From 9770a4bb37639abb6da0764d6ff5729df4399e1e Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Thu, 4 Apr 2024 16:14:25 +0200 Subject: [PATCH 01/37] Dimension: add index alias attribute --- loki/dimension.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/loki/dimension.py b/loki/dimension.py index a85a1a662..fc1386267 100644 --- a/loki/dimension.py +++ b/loki/dimension.py @@ -34,15 +34,19 @@ class Dimension: bounds_aliases : list or tuple of strings String representations of alternative bounds variables that are used to define loop ranges. + index_aliases : list or tuple of strings + String representations of alternative loop index variables associated + with this dimension. """ def __init__(self, name=None, index=None, bounds=None, size=None, aliases=None, - bounds_aliases=None): + bounds_aliases=None, index_aliases=None): self.name = name self._index = index self._bounds = as_tuple(bounds) self._size = size self._aliases = as_tuple(aliases) + self._index_aliases = as_tuple(index_aliases) if bounds_aliases: if len(bounds_aliases) != 2: From 925a71bb62dc63edeb5ba2b0cc3e61b55ccbbe77 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Thu, 4 Apr 2024 10:13:43 +0200 Subject: [PATCH 02/37] DeprivatiseStructsTrafo: first implementation --- loki/transformations/__init__.py | 1 + loki/transformations/deprivatise_structs.py | 173 ++++++++++++++++++++ 2 files changed, 174 insertions(+) create mode 100644 loki/transformations/deprivatise_structs.py diff --git a/loki/transformations/__init__.py b/loki/transformations/__init__.py index 6d4a8f863..f8c6a19f2 100644 --- a/loki/transformations/__init__.py +++ b/loki/transformations/__init__.py @@ -30,3 +30,4 @@ from loki.transformations.transform_region import * # noqa from loki.transformations.pool_allocator import * # noqa from loki.transformations.utilities import * # noqa +from transformations.deprivatise_structs import * # noqa diff --git a/loki/transformations/deprivatise_structs.py b/loki/transformations/deprivatise_structs.py new file mode 100644 index 000000000..8fa98a79e --- /dev/null +++ b/loki/transformations/deprivatise_structs.py @@ -0,0 +1,173 @@ +# (C) Copyright 2018- ECMWF. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +from loki import ( + Transformation, ProcedureItem, ir, Module, as_tuple, fgen, SymbolAttributes, BasicType, Variable, + RangeIndex, Array, FindVariables, resolve_associates, SubstituteExpressions, FindNodes +) + +from transformations.single_column_coalesced import SCCBaseTransformation + +__all__ = ['DeprivatiseStructsTransformation'] + +def get_parent_typedef(var, routine): + + if not var.parent.type.dtype.typedef == BasicType.DEFERRED: + return var.parent.type.dtype.typedef + elif not routine.symbol_map[var.parent.type.dtype.name].type.dtype.typedef == BasicType.DEFERRED: + return routine.symbol_map[var.parent.type.dtype.name].type.dtype.typedef + else: + raise RuntimeError(f'Container data-type {var.parent.type.dtype.name} not enriched') + +class DeprivatiseStructsTransformation(Transformation): + + + _key = 'DeprivatiseStructsTransformation' + + # This trafo only operates on procedures + item_filter = (ProcedureItem,) + + def __init__(self, horizontal, key=None): + self.horizontal = horizontal + if key: + self._key = key + + def transform_subroutine(self, routine, **kwargs): + + if not (item := kwargs['item']): + raise RuntimeError('Cannot apply DeprivatiseStructsTransformation without item to store definitions') + successors = kwargs.get('successors', ()) + + role = kwargs['role'] + targets = tuple(str(t).lower() for t in as_tuple(kwargs.get('targets', None))) + + if role == 'kernel': + self.process_kernel(routine, item, successors, targets) + if role == 'driver': + self.process_driver(routine, successors) + + @staticmethod + def _get_parkind_suffix(type): + return type.rsplit('_')[1][1:3] + + def _build_parkind_import(self, field_array_module, wrapper_types): + + deferred_type = SymbolAttributes(BasicType.DEFERRED, imported=True) + vars = {Variable(name='JP' + self._get_parkind_suffix(type), type=deferred_type, scope=field_array_module) + for type in wrapper_types} + + return ir.Import(module='PARKIND1', symbols=as_tuple(vars)) + + def _build_field_array_types(self, field_array_module, wrapper_types): + + typedefs = () + for type in wrapper_types: + suff = self._get_parkind_suffix(type) + kind = field_array_module.symbol_map['JP' + suff] + rank = int(type.rsplit('_')[1][0]) + + view_shape = (RangeIndex(children=(None, None)),) * (rank - 1) + array_shape = (RangeIndex(children=(None, None)),) * rank + + if suff == 'IM': + basetype = BasicType.INTEGER + elif suff == 'LM': + basetype = BasicType.LOGICAL + else: + basetype = BasicType.REAL + + pointer_type = SymbolAttributes(basetype, pointer=True, kind=kind, shape=view_shape) + contig_pointer_type = pointer_type.clone(contiguous=True, shape=array_shape) + + pointer_var = Variable(name='P', type=pointer_type, dimensions=view_shape) + contig_pointer_var = pointer_var.clone(name='P_FIELD', type=contig_pointer_type, dimensions=array_shape) + + decls = (ir.VariableDeclaration(symbols=(pointer_var,)),) + decls += (ir.VariableDeclaration(symbols=(contig_pointer_var,)),) + + typedefs += (ir.TypeDef(name=type, body=decls, parent=field_array_module),) + + return typedefs + + def _create_dummy_field_api_defs(self, field_array_mod_imports): + + wrapper_types = {sym.name for imp in field_array_mod_imports for sym in imp.symbols} + + # create dummy module with empty spec + field_array_module = Module(name='FIELD_ARRAY_MODULE', spec=ir.Section(body=())) + + # build parkind1 import + parkind_import = self._build_parkind_import(field_array_module, wrapper_types) + field_array_module.spec.append(parkind_import) + + # build dummy type definitions + typedefs = self._build_field_array_types(field_array_module, wrapper_types) + field_array_module.spec.append(typedefs) + + return [field_array_module,] + + @staticmethod + def propagate_defs_to_children(key, definitions, successors): + for child in successors: + child.ir.enrich(definitions) + child.trafo_data.update({key: {'definitions': definitions}}) + + def process_driver(self, routine, successors): + + # create dummy definitions for field_api wrapper types + field_array_mod_imports = [imp for imp in routine.imports if imp.module.lower() == 'field_array_module'] + definitions = [] + if field_array_mod_imports: + definitions += self._create_dummy_field_api_defs(field_array_mod_imports) + + # propagate dummy field_api wrapper definitions to children + self.propagate_defs_to_children(self._key, definitions, successors) + + def process_kernel(self, routine, item, successors, targets): + + # Sanitize the subroutine + resolve_associates(routine) + v_index = SCCBaseTransformation.get_integer_variable(routine, name=self.horizontal.index) + SCCBaseTransformation.resolve_masked_stmts(routine, loop_variable=v_index) + + if self.horizontal.bounds[0] in routine.variables and self.horizontal.bounds[1] in routine.variables: + _bounds = self.horizontal.bounds + else: + _bounds = self.horizontal._bounds_aliases + SCCBaseTransformation.resolve_vector_dimension(routine, loop_variable=v_index, bounds=_bounds) + + # build list of type-bound array access using the horizontal index + vars = [var for var in FindVariables().visit(routine.body) + if isinstance(var, Array) and var.parents] + vars = [var for var in vars if self.horizontal.index in var.dimensions] + + # remove YDCPG_SL1 members, as these are not memory blocked + vars = [var for var in vars if not 'ydcpg_sl1' in var] + + # build list of type-bound view pointers passed as subroutine arguments + for call in [call for call in FindNodes(ir.CallStatement).visit(routine.body) if call.name in targets]: + _args = {a: d for d, a in call.arg_map.items() if isinstance(d, Array)} + _args = {a: d for a, d in _args.items() + if any([v in d.shape for v in self.horizontal.size_expressions]) and a.parents} + vars += list(_args) + + # check if array pointers are defined + for var in vars: + typedef = get_parent_typedef(var, routine) + name = var.name_parts[-1] + '_FIELD' + if not name in [v.name for v in typedef.variables]: + raise RuntimeError(f'Container data-type {typedef.name} does not contain *_FIELD pointer') + + # replace view pointers with array pointers + vmap = {var: var.clone(name='%'.join([v for v in var.name_parts[:-1]]) + '%' + var.name_parts[-1] + '_FIELD') + for var in vars} + routine.body = SubstituteExpressions(vmap).visit(routine.body) + + # propagate dummy field_api wrapper definitions to children + definitions = item.trafo_data[self._key]['definitions'] + self.propagate_defs_to_children(self._key, definitions, successors) + From 460b81b9de6c56c7b347b4cea337787f313b8295 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Fri, 5 Apr 2024 10:49:41 +0200 Subject: [PATCH 03/37] BlockIndexInjectTransformation: first implementation --- loki/transformations/deprivatise_structs.py | 92 +++++++++++++++++++-- 1 file changed, 87 insertions(+), 5 deletions(-) diff --git a/loki/transformations/deprivatise_structs.py b/loki/transformations/deprivatise_structs.py index 8fa98a79e..0449189c9 100644 --- a/loki/transformations/deprivatise_structs.py +++ b/loki/transformations/deprivatise_structs.py @@ -7,12 +7,12 @@ from loki import ( Transformation, ProcedureItem, ir, Module, as_tuple, fgen, SymbolAttributes, BasicType, Variable, - RangeIndex, Array, FindVariables, resolve_associates, SubstituteExpressions, FindNodes + RangeIndex, Array, FindVariables, resolve_associates, SubstituteExpressions, FindNodes, resolve_type_bound_var ) from transformations.single_column_coalesced import SCCBaseTransformation -__all__ = ['DeprivatiseStructsTransformation'] +__all__ = ['DeprivatiseStructsTransformation', 'BlockIndexInjectTransformation'] def get_parent_typedef(var, routine): @@ -142,8 +142,7 @@ def process_kernel(self, routine, item, successors, targets): # build list of type-bound array access using the horizontal index vars = [var for var in FindVariables().visit(routine.body) - if isinstance(var, Array) and var.parents] - vars = [var for var in vars if self.horizontal.index in var.dimensions] + if isinstance(var, Array) and var.parents and self.horizontal.index in getattr(var, 'dimensions', ())] # remove YDCPG_SL1 members, as these are not memory blocked vars = [var for var in vars if not 'ydcpg_sl1' in var] @@ -159,7 +158,7 @@ def process_kernel(self, routine, item, successors, targets): for var in vars: typedef = get_parent_typedef(var, routine) name = var.name_parts[-1] + '_FIELD' - if not name in [v.name for v in typedef.variables]: + if not name in typedef.variable_map: raise RuntimeError(f'Container data-type {typedef.name} does not contain *_FIELD pointer') # replace view pointers with array pointers @@ -171,3 +170,86 @@ def process_kernel(self, routine, item, successors, targets): definitions = item.trafo_data[self._key]['definitions'] self.propagate_defs_to_children(self._key, definitions, successors) + +class BlockIndexInjectTransformation(Transformation): + + _key = 'BlockIndexInjectTransformation' + + # This trafo only operates on procedures + item_filter = (ProcedureItem,) + + def __init__(self, horizontal, block_dim, key=None): + self.horizontal = horizontal + self.block_dim = block_dim + if key: + self._key = key + + def transform_subroutine(self, routine, **kwargs): + + role = kwargs['role'] + targets = tuple(str(t).lower() for t in as_tuple(kwargs.get('targets', None))) + + if role == 'kernel': + self.process_kernel(routine, targets) + + @staticmethod + def get_derived_type_member_rank(a, routine): + typedef = get_parent_typedef(a, routine) + return len(typedef.variable_map[a.name_parts[-1]].shape) + + @staticmethod + def _update_expr_map(var, rank, index): + if getattr(var, 'dimensions', None): + return {var: var.clone(dimensions=var.dimensions + as_tuple(index))} + else: + return {var: + var.clone(dimensions=((RangeIndex(children=(None, None)),) * (rank - 1)) + as_tuple(index))} + + def process_kernel(self, routine, targets): + + # Check that the block index is defined + if self.block_dim.index in routine.variables: + block_index = routine.variable_map[self.block_dim.index] + elif any(i.rsplit('%')[0] in routine.variables for i in self.block_dim._index_aliases): + index_name = [alias for alias in self.block_dim._index_aliases + if alias.rsplit('%')[0] in routine.variables][0] + + child, parent = resolve_type_bound_var(index_name) + block_index = Variable(name=child, parent=parent, scope=routine) + else: + # we skip routines that do not contain the block index + return + + # The logic for callstatement args differs from other array instances in the body, + # so we build a list to filter + call_args = [a for call in FindNodes(ir.CallStatement).visit(routine.body) for a in call.arguments] + + # First get rank mismatched call statement args + vmap = {} + for call in [call for call in FindNodes(ir.CallStatement).visit(routine.body) if call.name in targets]: + _args = {a: d for d, a in call.arg_map.items() if isinstance(d, Array) + if any([v in getattr(d, 'shape', None) for v in self.horizontal.size_expressions])} + + for arg, dummy in _args.items(): + if arg.parents: + rank = self.get_derived_type_member_rank(arg, routine) + else: + rank = len(arg.shape) + + if rank - 1 == len(dummy.shape): + vmap.update(self._update_expr_map(arg, rank, block_index)) + + # Now get the rest of the horizontal arrays + for var in [var for var in FindVariables().visit(routine.body) if isinstance(var, Array) + and self.horizontal.index in getattr(var, 'dimensions', ()) and not var in call_args]: + + local_rank = len(var.dimensions) + if var.parents: + decl_rank = self.get_derived_type_member_rank(var, routine) + else: + decl_rank = len(var.shape) + + if local_rank == decl_rank - 1: + vmap.update(self._update_expr_map(var, decl_rank, block_index)) + + routine.body = SubstituteExpressions(vmap).visit(routine.body) \ No newline at end of file From 1abe60b249cc6f82022c7180721c85859751b75d Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Fri, 5 Apr 2024 11:46:28 +0200 Subject: [PATCH 04/37] Change trafo name to UnprivatiseStructsTransformation --- loki/transformations/__init__.py | 2 +- .../{deprivatise_structs.py => unprivatise_structs.py} | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) rename loki/transformations/{deprivatise_structs.py => unprivatise_structs.py} (98%) diff --git a/loki/transformations/__init__.py b/loki/transformations/__init__.py index f8c6a19f2..0d3e2bd28 100644 --- a/loki/transformations/__init__.py +++ b/loki/transformations/__init__.py @@ -30,4 +30,4 @@ from loki.transformations.transform_region import * # noqa from loki.transformations.pool_allocator import * # noqa from loki.transformations.utilities import * # noqa -from transformations.deprivatise_structs import * # noqa +from transformations.unprivatise_structs import * # noqa diff --git a/loki/transformations/deprivatise_structs.py b/loki/transformations/unprivatise_structs.py similarity index 98% rename from loki/transformations/deprivatise_structs.py rename to loki/transformations/unprivatise_structs.py index 0449189c9..8e01a226c 100644 --- a/loki/transformations/deprivatise_structs.py +++ b/loki/transformations/unprivatise_structs.py @@ -12,7 +12,7 @@ from transformations.single_column_coalesced import SCCBaseTransformation -__all__ = ['DeprivatiseStructsTransformation', 'BlockIndexInjectTransformation'] +__all__ = ['UnprivatiseStructsTransformation', 'BlockIndexInjectTransformation'] def get_parent_typedef(var, routine): @@ -23,10 +23,10 @@ def get_parent_typedef(var, routine): else: raise RuntimeError(f'Container data-type {var.parent.type.dtype.name} not enriched') -class DeprivatiseStructsTransformation(Transformation): +class UnprivatiseStructsTransformation(Transformation): - _key = 'DeprivatiseStructsTransformation' + _key = 'UnprivatiseStructsTransformation' # This trafo only operates on procedures item_filter = (ProcedureItem,) From 78e070c48fa4c9ddf0baa9613682357e63aee670 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Fri, 5 Apr 2024 15:06:27 +0000 Subject: [PATCH 05/37] SCRIPTS: add unprivatise_structs option to scripts --- cmake/loki_transform.cmake | 6 ++++++ cmake/loki_transform_helpers.cmake | 4 ++++ scripts/loki_transform.py | 12 ++++++++++-- 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/cmake/loki_transform.cmake b/cmake/loki_transform.cmake index 8cfebc84b..dc6cad6a8 100644 --- a/cmake/loki_transform.cmake +++ b/cmake/loki_transform.cmake @@ -48,6 +48,7 @@ function( loki_transform ) set( options CPP DATA_OFFLOAD REMOVE_OPENMP ASSUME_DEVICEPTR TRIM_VECTOR_SECTIONS GLOBAL_VAR_OFFLOAD REMOVE_DERIVED_ARGS INLINE_MEMBERS RESOLVE_SEQUENCE_ASSOCIATION DERIVE_ARGUMENT_ARRAY_SHAPE + UNPRIVATISE_STRUCTS ) set( oneValueArgs COMMAND MODE DIRECTIVE FRONTEND CONFIG BUILDDIR @@ -226,6 +227,7 @@ function( loki_transform_target ) set( options NO_PLAN_SOURCEDIR COPY_UNMODIFIED CPP CPP_PLAN INLINE_MEMBERS RESOLVE_SEQUENCE_ASSOCIATION DERIVE_ARGUMENT_ARRAY_SHAPE TRIM_VECTOR_SECTIONS GLOBAL_VAR_OFFLOAD + UNPRIVATISE_STRUCTS ) set( single_value_args TARGET COMMAND MODE DIRECTIVE FRONTEND CONFIG PLAN ) set( multi_value_args SOURCES HEADERS DEFINITIONS INCLUDES ) @@ -311,6 +313,10 @@ function( loki_transform_target ) list( APPEND _TRANSFORM_OPTIONS GLOBAL_VAR_OFFLOAD ) endif() + if( _PAR_T_UNPRIVATISE_STRUCTS ) + list( APPEND _TRANSFORM_OPTIONS UNPRIVATISE_STRUCTS ) + endif() + loki_transform( COMMAND ${_PAR_T_COMMAND} OUTPUT ${LOKI_SOURCES_TO_APPEND} diff --git a/cmake/loki_transform_helpers.cmake b/cmake/loki_transform_helpers.cmake index 680ae0e72..092d4fb68 100644 --- a/cmake/loki_transform_helpers.cmake +++ b/cmake/loki_transform_helpers.cmake @@ -120,6 +120,10 @@ macro( _loki_transform_parse_options ) list( APPEND _ARGS --derive-argument-array-shape ) endif() + if( _PAR_UNPRIVATISE_STRUCTS ) + list( APPEND _ARGS --unprivatise-structs ) + endif() + endmacro() ############################################################################## diff --git a/scripts/loki_transform.py b/scripts/loki_transform.py index f9b09d660..dbe95ce9f 100644 --- a/scripts/loki_transform.py +++ b/scripts/loki_transform.py @@ -46,6 +46,8 @@ ) from loki.transformations.transpile import FortranCTransformation +from transformations.unprivatise_structs import UnprivatiseStructsTransformation, BlockIndexInjectTransformation + class IdemTransformation(Transformation): """ @@ -120,12 +122,14 @@ def cli(debug): help="Recursively derive explicit shape dimension for argument arrays") @click.option('--eliminate-dead-code/--no-eliminate-dead-code', default=True, help='Perform dead code elimination, where unreachable branches are trimmed from the code.') +@click.option('--unprivatise-structs', is_flag=True, default=False, + help='Unprivatise OpenMP thread-private data structs.') def convert( mode, config, build, source, header, cpp, directive, include, define, omni_include, xmod, data_offload, remove_openmp, assume_deviceptr, frontend, trim_vector_sections, global_var_offload, remove_derived_args, inline_members, inline_marked, resolve_sequence_association, resolve_sequence_association_inlined_calls, - derive_argument_array_shape, eliminate_dead_code + derive_argument_array_shape, eliminate_dead_code, unprivatise_structs ): """ Batch-processing mode for Fortran-to-Fortran transformations that @@ -220,13 +224,17 @@ def convert( ) scheduler.process(transformation=sanitise_trafo) + if unprivatise_structs: + scheduler.process( UnprivatiseStructsTransformation(horizontal) ) + scheduler.process( BlockIndexInjectTransformation(horizontal, block_dim) ) + # Perform source-inlining either from CLI arguments or from config inline_trafo = scheduler.config.transformations.get('InlineTransformation', None) if not inline_trafo: inline_trafo = InlineTransformation( inline_internals=inline_members, inline_marked=inline_marked, remove_dead_code=eliminate_dead_code, allowed_aliases=horizontal.index, - resolve_sequence_association=resolve_sequence_association_inlined_calls + resolve_sequence_association=resolve_sequence_association_inlined_calls ) scheduler.process(transformation=inline_trafo) From acc16d520b4cace3709dbd7adefabc3970122dd2 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Fri, 5 Apr 2024 17:39:12 +0200 Subject: [PATCH 06/37] BlockIndexInject: cleanup and fixes --- loki/transformations/unprivatise_structs.py | 58 +++++++++++++-------- 1 file changed, 36 insertions(+), 22 deletions(-) diff --git a/loki/transformations/unprivatise_structs.py b/loki/transformations/unprivatise_structs.py index 8e01a226c..b998ba3b2 100644 --- a/loki/transformations/unprivatise_structs.py +++ b/loki/transformations/unprivatise_structs.py @@ -6,8 +6,9 @@ # nor does it submit to any jurisdiction. from loki import ( - Transformation, ProcedureItem, ir, Module, as_tuple, fgen, SymbolAttributes, BasicType, Variable, - RangeIndex, Array, FindVariables, resolve_associates, SubstituteExpressions, FindNodes, resolve_type_bound_var + Transformation, ProcedureItem, ir, Module, as_tuple, SymbolAttributes, BasicType, Variable, + RangeIndex, Array, FindVariables, resolve_associates, SubstituteExpressions, FindNodes, + resolve_typebound_var ) from transformations.single_column_coalesced import SCCBaseTransformation @@ -205,19 +206,39 @@ def _update_expr_map(var, rank, index): return {var: var.clone(dimensions=((RangeIndex(children=(None, None)),) * (rank - 1)) + as_tuple(index))} - def process_kernel(self, routine, targets): + def get_variable_rank(self, var, routine): + if var.parents: + rank = self.get_derived_type_member_rank(var, routine) + else: + rank = len(var.shape) - # Check that the block index is defined - if self.block_dim.index in routine.variables: - block_index = routine.variable_map[self.block_dim.index] - elif any(i.rsplit('%')[0] in routine.variables for i in self.block_dim._index_aliases): - index_name = [alias for alias in self.block_dim._index_aliases - if alias.rsplit('%')[0] in routine.variables][0] + return rank - child, parent = resolve_type_bound_var(index_name) - block_index = Variable(name=child, parent=parent, scope=routine) + def get_call_arg_rank(self, arg, routine): + if getattr(arg, 'dimensions', None): + # We assume here that the callstatement is free of sequence association + rank = max(1, len([d for d in arg.dimensions if isinstance(d, RangeIndex)])) else: - # we skip routines that do not contain the block index + rank = self.get_variable_rank(arg, routine) + + return rank + + def get_block_index(self, routine): + variable_map = routine.variable_map + if (block_index := variable_map.get(self.block_dim.index, None)): + return block_index + elif any(i.rsplit('%')[0] in variable_map for i in self.block_dim._index_aliases): + index_name = [alias for alias in self.block_dim._index_aliases + if alias.rsplit('%')[0] in variable_map][0] + + block_index = resolve_typebound_var(index_name, variable_map) + + return block_index + + def process_kernel(self, routine, targets): + + # we skip routines that do not contain the block index or any known alias + if not (block_index := self.get_block_index(routine)): return # The logic for callstatement args differs from other array instances in the body, @@ -231,11 +252,7 @@ def process_kernel(self, routine, targets): if any([v in getattr(d, 'shape', None) for v in self.horizontal.size_expressions])} for arg, dummy in _args.items(): - if arg.parents: - rank = self.get_derived_type_member_rank(arg, routine) - else: - rank = len(arg.shape) - + rank = self.get_call_arg_rank(arg, routine) if rank - 1 == len(dummy.shape): vmap.update(self._update_expr_map(arg, rank, block_index)) @@ -244,12 +261,9 @@ def process_kernel(self, routine, targets): and self.horizontal.index in getattr(var, 'dimensions', ()) and not var in call_args]: local_rank = len(var.dimensions) - if var.parents: - decl_rank = self.get_derived_type_member_rank(var, routine) - else: - decl_rank = len(var.shape) + decl_rank = self.get_variable_rank(var, routine) if local_rank == decl_rank - 1: vmap.update(self._update_expr_map(var, decl_rank, block_index)) - routine.body = SubstituteExpressions(vmap).visit(routine.body) \ No newline at end of file + routine.body = SubstituteExpressions(vmap).visit(routine.body) From 7d1c5f122ba41a07fd97b8dfce21b3bd991ec399 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Mon, 15 Apr 2024 20:53:52 +0200 Subject: [PATCH 07/37] BlockIndexInject: fix call arg rank logic --- loki/transformations/unprivatise_structs.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/loki/transformations/unprivatise_structs.py b/loki/transformations/unprivatise_structs.py index b998ba3b2..e6474d469 100644 --- a/loki/transformations/unprivatise_structs.py +++ b/loki/transformations/unprivatise_structs.py @@ -215,11 +215,10 @@ def get_variable_rank(self, var, routine): return rank def get_call_arg_rank(self, arg, routine): + rank = self.get_variable_rank(arg, routine) if getattr(arg, 'dimensions', None): # We assume here that the callstatement is free of sequence association - rank = max(1, len([d for d in arg.dimensions if isinstance(d, RangeIndex)])) - else: - rank = self.get_variable_rank(arg, routine) + rank = rank - len([d for d in arg.dimensions if not isinstance(d, RangeIndex)]) return rank From 95b71210b95994e4a94e4320131fd1617ae62618 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Wed, 17 Apr 2024 21:11:29 +0200 Subject: [PATCH 08/37] Various improvements to UnprivatiseStructsTrafo and BlockIndexInjectTrafo --- loki/transformations/unprivatise_structs.py | 96 ++++++++++----------- scripts/loki_transform.py | 2 +- 2 files changed, 49 insertions(+), 49 deletions(-) diff --git a/loki/transformations/unprivatise_structs.py b/loki/transformations/unprivatise_structs.py index e6474d469..6c8d447fd 100644 --- a/loki/transformations/unprivatise_structs.py +++ b/loki/transformations/unprivatise_structs.py @@ -8,22 +8,13 @@ from loki import ( Transformation, ProcedureItem, ir, Module, as_tuple, SymbolAttributes, BasicType, Variable, RangeIndex, Array, FindVariables, resolve_associates, SubstituteExpressions, FindNodes, - resolve_typebound_var + resolve_typebound_var, recursive_expression_map_update ) from transformations.single_column_coalesced import SCCBaseTransformation __all__ = ['UnprivatiseStructsTransformation', 'BlockIndexInjectTransformation'] -def get_parent_typedef(var, routine): - - if not var.parent.type.dtype.typedef == BasicType.DEFERRED: - return var.parent.type.dtype.typedef - elif not routine.symbol_map[var.parent.type.dtype.name].type.dtype.typedef == BasicType.DEFERRED: - return routine.symbol_map[var.parent.type.dtype.name].type.dtype.typedef - else: - raise RuntimeError(f'Container data-type {var.parent.type.dtype.name} not enriched') - class UnprivatiseStructsTransformation(Transformation): @@ -37,6 +28,16 @@ def __init__(self, horizontal, key=None): if key: self._key = key + @staticmethod + def get_parent_typedef(var, routine): + + if not var.parent.type.dtype.typedef == BasicType.DEFERRED: + return var.parent.type.dtype.typedef + elif not routine.symbol_map[var.parent.type.dtype.name].type.dtype.typedef == BasicType.DEFERRED: + return routine.symbol_map[var.parent.type.dtype.name].type.dtype.typedef + else: + raise RuntimeError(f'Container data-type {var.parent.type.dtype.name} not enriched') + def transform_subroutine(self, routine, **kwargs): if not (item := kwargs['item']): @@ -128,6 +129,17 @@ def process_driver(self, routine, successors): # propagate dummy field_api wrapper definitions to children self.propagate_defs_to_children(self._key, definitions, successors) + def build_ydvars_global_gfl_ptr(self, var): + if (parent := var.parent): + parent = self.build_ydvars_global_gfl_ptr(parent) + + _type = var.type + if 'gfl_ptr' in var.name.lower().split('%')[-1]: + _type = parent.type.dtype.typedef.variable_map['gfl_ptr_g'].type + + return var.clone(name=var.name.upper().replace('GFL_PTR', 'GFL_PTR_G'), + parent=parent, type=_type) + def process_kernel(self, routine, item, successors, targets): # Sanitize the subroutine @@ -155,16 +167,15 @@ def process_kernel(self, routine, item, successors, targets): if any([v in d.shape for v in self.horizontal.size_expressions]) and a.parents} vars += list(_args) - # check if array pointers are defined - for var in vars: - typedef = get_parent_typedef(var, routine) - name = var.name_parts[-1] + '_FIELD' - if not name in typedef.variable_map: - raise RuntimeError(f'Container data-type {typedef.name} does not contain *_FIELD pointer') - - # replace view pointers with array pointers - vmap = {var: var.clone(name='%'.join([v for v in var.name_parts[:-1]]) + '%' + var.name_parts[-1] + '_FIELD') + # replace per-block view pointers with full field pointers + vmap = {var: var.clone(name=var.name_parts[-1] + '_FIELD', + type=self.get_parent_typedef(var, routine).variable_map[var.name_parts[-1] + '_FIELD'].type) for var in vars} + + # replace thread-private GFL_PTR with global + vmap.update({v: self.build_ydvars_global_gfl_ptr(vmap.get(v, v)) + for v in FindVariables().visit(routine.body) if 'ydvars%gfl_ptr' in v.name.lower()}) + vmap = recursive_expression_map_update(vmap) routine.body = SubstituteExpressions(vmap).visit(routine.body) # propagate dummy field_api wrapper definitions to children @@ -179,8 +190,7 @@ class BlockIndexInjectTransformation(Transformation): # This trafo only operates on procedures item_filter = (ProcedureItem,) - def __init__(self, horizontal, block_dim, key=None): - self.horizontal = horizontal + def __init__(self, block_dim, key=None): self.block_dim = block_dim if key: self._key = key @@ -193,11 +203,6 @@ def transform_subroutine(self, routine, **kwargs): if role == 'kernel': self.process_kernel(routine, targets) - @staticmethod - def get_derived_type_member_rank(a, routine): - typedef = get_parent_typedef(a, routine) - return len(typedef.variable_map[a.name_parts[-1]].shape) - @staticmethod def _update_expr_map(var, rank, index): if getattr(var, 'dimensions', None): @@ -206,16 +211,9 @@ def _update_expr_map(var, rank, index): return {var: var.clone(dimensions=((RangeIndex(children=(None, None)),) * (rank - 1)) + as_tuple(index))} - def get_variable_rank(self, var, routine): - if var.parents: - rank = self.get_derived_type_member_rank(var, routine) - else: - rank = len(var.shape) - - return rank - - def get_call_arg_rank(self, arg, routine): - rank = self.get_variable_rank(arg, routine) + @staticmethod + def get_call_arg_rank(arg): + rank = len(arg.shape) if getattr(arg, 'shape', None) else 0 if getattr(arg, 'dimensions', None): # We assume here that the callstatement is free of sequence association rank = rank - len([d for d in arg.dimensions if not isinstance(d, RangeIndex)]) @@ -240,27 +238,29 @@ def process_kernel(self, routine, targets): if not (block_index := self.get_block_index(routine)): return - # The logic for callstatement args differs from other array instances in the body, + # The logic for callstatement args differs from other variables in the body, # so we build a list to filter call_args = [a for call in FindNodes(ir.CallStatement).visit(routine.body) for a in call.arguments] # First get rank mismatched call statement args vmap = {} for call in [call for call in FindNodes(ir.CallStatement).visit(routine.body) if call.name in targets]: - _args = {a: d for d, a in call.arg_map.items() if isinstance(d, Array) - if any([v in getattr(d, 'shape', None) for v in self.horizontal.size_expressions])} - - for arg, dummy in _args.items(): - rank = self.get_call_arg_rank(arg, routine) - if rank - 1 == len(dummy.shape): - vmap.update(self._update_expr_map(arg, rank, block_index)) + for dummy, arg in call.arg_map.items(): + arg_rank = self.get_call_arg_rank(arg) + dummy_rank = len(dummy.shape) if getattr(dummy, 'shape', None) else 0 + if arg_rank - 1 == dummy_rank: + vmap.update(self._update_expr_map(arg, arg_rank, block_index)) - # Now get the rest of the horizontal arrays - for var in [var for var in FindVariables().visit(routine.body) if isinstance(var, Array) - and self.horizontal.index in getattr(var, 'dimensions', ()) and not var in call_args]: + # Now get the rest of the variables + for var in [var for var in FindVariables().visit(routine.body) + if getattr(var, 'dimensions', None) and not var in call_args]: local_rank = len(var.dimensions) - decl_rank = self.get_variable_rank(var, routine) + decl_rank = local_rank + # we assume here that all derived-type components we wish to transform + # have been parsed + if getattr(var, 'shape', None): + decl_rank = len(var.shape) if local_rank == decl_rank - 1: vmap.update(self._update_expr_map(var, decl_rank, block_index)) diff --git a/scripts/loki_transform.py b/scripts/loki_transform.py index dbe95ce9f..e0e4b87dc 100644 --- a/scripts/loki_transform.py +++ b/scripts/loki_transform.py @@ -226,7 +226,7 @@ def convert( if unprivatise_structs: scheduler.process( UnprivatiseStructsTransformation(horizontal) ) - scheduler.process( BlockIndexInjectTransformation(horizontal, block_dim) ) + scheduler.process( BlockIndexInjectTransformation(block_dim) ) # Perform source-inlining either from CLI arguments or from config inline_trafo = scheduler.config.transformations.get('InlineTransformation', None) From 4114ae6aac58946e1749f6687fdef8d3aa47b08e Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Thu, 18 Apr 2024 17:23:20 +0200 Subject: [PATCH 09/37] Add configurable exclude list to unprivatise/blockinject trafos --- loki/transformations/unprivatise_structs.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/loki/transformations/unprivatise_structs.py b/loki/transformations/unprivatise_structs.py index 6c8d447fd..d09798f13 100644 --- a/loki/transformations/unprivatise_structs.py +++ b/loki/transformations/unprivatise_structs.py @@ -23,8 +23,9 @@ class UnprivatiseStructsTransformation(Transformation): # This trafo only operates on procedures item_filter = (ProcedureItem,) - def __init__(self, horizontal, key=None): + def __init__(self, horizontal, exclude=[], key=None): self.horizontal = horizontal + self.exclude = exclude if key: self._key = key @@ -157,9 +158,6 @@ def process_kernel(self, routine, item, successors, targets): vars = [var for var in FindVariables().visit(routine.body) if isinstance(var, Array) and var.parents and self.horizontal.index in getattr(var, 'dimensions', ())] - # remove YDCPG_SL1 members, as these are not memory blocked - vars = [var for var in vars if not 'ydcpg_sl1' in var] - # build list of type-bound view pointers passed as subroutine arguments for call in [call for call in FindNodes(ir.CallStatement).visit(routine.body) if call.name in targets]: _args = {a: d for d, a in call.arg_map.items() if isinstance(d, Array)} @@ -176,6 +174,11 @@ def process_kernel(self, routine, item, successors, targets): vmap.update({v: self.build_ydvars_global_gfl_ptr(vmap.get(v, v)) for v in FindVariables().visit(routine.body) if 'ydvars%gfl_ptr' in v.name.lower()}) vmap = recursive_expression_map_update(vmap) + + # filter out arrays marked for exclusion + vmap = {k: v for k, v in vmap.items() if not any(e in k for e in self.exclude)} + + # finally perform the substitution routine.body = SubstituteExpressions(vmap).visit(routine.body) # propagate dummy field_api wrapper definitions to children @@ -190,8 +193,9 @@ class BlockIndexInjectTransformation(Transformation): # This trafo only operates on procedures item_filter = (ProcedureItem,) - def __init__(self, block_dim, key=None): + def __init__(self, block_dim, exclude=[], key=None): self.block_dim = block_dim + self.exclude = exclude if key: self._key = key @@ -265,4 +269,7 @@ def process_kernel(self, routine, targets): if local_rank == decl_rank - 1: vmap.update(self._update_expr_map(var, decl_rank, block_index)) + # filter out arrays marked for exclusion + vmap = {k: v for k, v in vmap.items() if not any(e in k for e in self.exclude)} + routine.body = SubstituteExpressions(vmap).visit(routine.body) From f67806da23a95ff30550ce4e5acf468f8ceafe79 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Thu, 18 Apr 2024 17:23:52 +0200 Subject: [PATCH 10/37] SCRIPTS: read unprivatise pipeline from config --- scripts/loki_transform.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/scripts/loki_transform.py b/scripts/loki_transform.py index e0e4b87dc..fad250fae 100644 --- a/scripts/loki_transform.py +++ b/scripts/loki_transform.py @@ -46,8 +46,6 @@ ) from loki.transformations.transpile import FortranCTransformation -from transformations.unprivatise_structs import UnprivatiseStructsTransformation, BlockIndexInjectTransformation - class IdemTransformation(Transformation): """ @@ -225,8 +223,8 @@ def convert( scheduler.process(transformation=sanitise_trafo) if unprivatise_structs: - scheduler.process( UnprivatiseStructsTransformation(horizontal) ) - scheduler.process( BlockIndexInjectTransformation(block_dim) ) + assert config.pipelines['unprivatise_structs'] + scheduler.process( config.pipelines['unprivatise_structs'] ) # Perform source-inlining either from CLI arguments or from config inline_trafo = scheduler.config.transformations.get('InlineTransformation', None) From e60eca67d6cfcc2dae5471f85d5e3f435dad70c6 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Fri, 19 Apr 2024 10:38:09 +0200 Subject: [PATCH 11/37] Adapt unprivatise/blockinject trafos so they can also be run on driver-layer code --- loki/transformations/unprivatise_structs.py | 130 +++++++++++--------- 1 file changed, 72 insertions(+), 58 deletions(-) diff --git a/loki/transformations/unprivatise_structs.py b/loki/transformations/unprivatise_structs.py index d09798f13..2f69d6531 100644 --- a/loki/transformations/unprivatise_structs.py +++ b/loki/transformations/unprivatise_structs.py @@ -23,21 +23,20 @@ class UnprivatiseStructsTransformation(Transformation): # This trafo only operates on procedures item_filter = (ProcedureItem,) - def __init__(self, horizontal, exclude=[], key=None): + def __init__(self, horizontal, exclude=(), key=None): self.horizontal = horizontal self.exclude = exclude if key: - self._key = key + self._key = key @staticmethod - def get_parent_typedef(var, routine): + def get_parent_typedef(var, symbol_map): if not var.parent.type.dtype.typedef == BasicType.DEFERRED: return var.parent.type.dtype.typedef - elif not routine.symbol_map[var.parent.type.dtype.name].type.dtype.typedef == BasicType.DEFERRED: - return routine.symbol_map[var.parent.type.dtype.name].type.dtype.typedef - else: - raise RuntimeError(f'Container data-type {var.parent.type.dtype.name} not enriched') + if not symbol_map[var.parent.type.dtype.name].type.dtype.typedef == BasicType.DEFERRED: + return symbol_map[var.parent.type.dtype.name].type.dtype.typedef + raise RuntimeError(f'Container data-type {var.parent.type.dtype.name} not enriched') def transform_subroutine(self, routine, **kwargs): @@ -51,27 +50,27 @@ def transform_subroutine(self, routine, **kwargs): if role == 'kernel': self.process_kernel(routine, item, successors, targets) if role == 'driver': - self.process_driver(routine, successors) + self.process_driver(routine, successors) @staticmethod - def _get_parkind_suffix(type): - return type.rsplit('_')[1][1:3] + def _get_parkind_suffix(_type): + return _type.rsplit('_')[1][1:3] def _build_parkind_import(self, field_array_module, wrapper_types): deferred_type = SymbolAttributes(BasicType.DEFERRED, imported=True) - vars = {Variable(name='JP' + self._get_parkind_suffix(type), type=deferred_type, scope=field_array_module) - for type in wrapper_types} + _vars = {Variable(name='JP' + self._get_parkind_suffix(t), type=deferred_type, scope=field_array_module) + for t in wrapper_types} - return ir.Import(module='PARKIND1', symbols=as_tuple(vars)) + return ir.Import(module='PARKIND1', symbols=as_tuple(_vars)) def _build_field_array_types(self, field_array_module, wrapper_types): typedefs = () - for type in wrapper_types: - suff = self._get_parkind_suffix(type) + for _type in wrapper_types: + suff = self._get_parkind_suffix(_type) kind = field_array_module.symbol_map['JP' + suff] - rank = int(type.rsplit('_')[1][0]) + rank = int(_type.rsplit('_')[1][0]) view_shape = (RangeIndex(children=(None, None)),) * (rank - 1) array_shape = (RangeIndex(children=(None, None)),) * rank @@ -87,12 +86,12 @@ def _build_field_array_types(self, field_array_module, wrapper_types): contig_pointer_type = pointer_type.clone(contiguous=True, shape=array_shape) pointer_var = Variable(name='P', type=pointer_type, dimensions=view_shape) - contig_pointer_var = pointer_var.clone(name='P_FIELD', type=contig_pointer_type, dimensions=array_shape) + contig_pointer_var = pointer_var.clone(name='P_FIELD', type=contig_pointer_type, dimensions=array_shape) # pylint: disable=no-member decls = (ir.VariableDeclaration(symbols=(pointer_var,)),) decls += (ir.VariableDeclaration(symbols=(contig_pointer_var,)),) - typedefs += (ir.TypeDef(name=type, body=decls, parent=field_array_module),) + typedefs += (ir.TypeDef(name=_type, body=decls, parent=field_array_module),) return typedefs @@ -130,6 +129,8 @@ def process_driver(self, routine, successors): # propagate dummy field_api wrapper definitions to children self.propagate_defs_to_children(self._key, definitions, successors) + #TODO: we also need to process any code inside a loki/acdc parallel pragma at the driver layer + def build_ydvars_global_gfl_ptr(self, var): if (parent := var.parent): parent = self.build_ydvars_global_gfl_ptr(parent) @@ -141,50 +142,57 @@ def build_ydvars_global_gfl_ptr(self, var): return var.clone(name=var.name.upper().replace('GFL_PTR', 'GFL_PTR_G'), parent=parent, type=_type) - def process_kernel(self, routine, item, successors, targets): - - # Sanitize the subroutine - resolve_associates(routine) - v_index = SCCBaseTransformation.get_integer_variable(routine, name=self.horizontal.index) - SCCBaseTransformation.resolve_masked_stmts(routine, loop_variable=v_index) - - if self.horizontal.bounds[0] in routine.variables and self.horizontal.bounds[1] in routine.variables: - _bounds = self.horizontal.bounds - else: - _bounds = self.horizontal._bounds_aliases - SCCBaseTransformation.resolve_vector_dimension(routine, loop_variable=v_index, bounds=_bounds) + def process_body(self, body, symbol_map, definitions, successors, targets): # build list of type-bound array access using the horizontal index - vars = [var for var in FindVariables().visit(routine.body) + _vars = [var for var in FindVariables().visit(body) if isinstance(var, Array) and var.parents and self.horizontal.index in getattr(var, 'dimensions', ())] # build list of type-bound view pointers passed as subroutine arguments - for call in [call for call in FindNodes(ir.CallStatement).visit(routine.body) if call.name in targets]: + for call in [call for call in FindNodes(ir.CallStatement).visit(body) if call.name in targets]: _args = {a: d for d, a in call.arg_map.items() if isinstance(d, Array)} _args = {a: d for a, d in _args.items() - if any([v in d.shape for v in self.horizontal.size_expressions]) and a.parents} - vars += list(_args) + if any(v in d.shape for v in self.horizontal.size_expressions) and a.parents} + _vars += list(_args) # replace per-block view pointers with full field pointers - vmap = {var: var.clone(name=var.name_parts[-1] + '_FIELD', - type=self.get_parent_typedef(var, routine).variable_map[var.name_parts[-1] + '_FIELD'].type) - for var in vars} + vmap = {var: + var.clone(name=var.name_parts[-1] + '_FIELD', + type=self.get_parent_typedef(var, symbol_map).variable_map[var.name_parts[-1] + '_FIELD'].type) + for var in _vars} # replace thread-private GFL_PTR with global vmap.update({v: self.build_ydvars_global_gfl_ptr(vmap.get(v, v)) - for v in FindVariables().visit(routine.body) if 'ydvars%gfl_ptr' in v.name.lower()}) + for v in FindVariables().visit(body) if 'ydvars%gfl_ptr' in v.name.lower()}) vmap = recursive_expression_map_update(vmap) # filter out arrays marked for exclusion vmap = {k: v for k, v in vmap.items() if not any(e in k for e in self.exclude)} - # finally perform the substitution - routine.body = SubstituteExpressions(vmap).visit(routine.body) - # propagate dummy field_api wrapper definitions to children - definitions = item.trafo_data[self._key]['definitions'] self.propagate_defs_to_children(self._key, definitions, successors) + # finally we perform the substitution + return SubstituteExpressions(vmap).visit(body) + + + def process_kernel(self, routine, item, successors, targets): + + # Sanitize the subroutine + resolve_associates(routine) + v_index = SCCBaseTransformation.get_integer_variable(routine, name=self.horizontal.index) + SCCBaseTransformation.resolve_masked_stmts(routine, loop_variable=v_index) + + if self.horizontal.bounds[0] in routine.variables and self.horizontal.bounds[1] in routine.variables: + _bounds = self.horizontal.bounds + else: + _bounds = self.horizontal._bounds_aliases + SCCBaseTransformation.resolve_vector_dimension(routine, loop_variable=v_index, bounds=_bounds) + + # for kernels we process the entire body + routine.body = self.process_body(routine.body, routine.symbol_map, item.trafo_data[self._key]['definitions'], + successors, targets) + class BlockIndexInjectTransformation(Transformation): @@ -193,11 +201,11 @@ class BlockIndexInjectTransformation(Transformation): # This trafo only operates on procedures item_filter = (ProcedureItem,) - def __init__(self, block_dim, exclude=[], key=None): + def __init__(self, block_dim, exclude=(), key=None): self.block_dim = block_dim self.exclude = exclude if key: - self._key = key + self._key = key def transform_subroutine(self, routine, **kwargs): @@ -207,13 +215,14 @@ def transform_subroutine(self, routine, **kwargs): if role == 'kernel': self.process_kernel(routine, targets) + #TODO: we also need to process any code inside a loki/acdc parallel pragma at the driver layer + @staticmethod def _update_expr_map(var, rank, index): if getattr(var, 'dimensions', None): return {var: var.clone(dimensions=var.dimensions + as_tuple(index))} - else: - return {var: - var.clone(dimensions=((RangeIndex(children=(None, None)),) * (rank - 1)) + as_tuple(index))} + return {var: + var.clone(dimensions=((RangeIndex(children=(None, None)),) * (rank - 1)) + as_tuple(index))} @staticmethod def get_call_arg_rank(arg): @@ -228,7 +237,7 @@ def get_block_index(self, routine): variable_map = routine.variable_map if (block_index := variable_map.get(self.block_dim.index, None)): return block_index - elif any(i.rsplit('%')[0] in variable_map for i in self.block_dim._index_aliases): + if any(i.rsplit('%')[0] in variable_map for i in self.block_dim._index_aliases): index_name = [alias for alias in self.block_dim._index_aliases if alias.rsplit('%')[0] in variable_map][0] @@ -236,19 +245,14 @@ def get_block_index(self, routine): return block_index - def process_kernel(self, routine, targets): - - # we skip routines that do not contain the block index or any known alias - if not (block_index := self.get_block_index(routine)): - return - + def process_body(self, body, block_index, targets): # The logic for callstatement args differs from other variables in the body, # so we build a list to filter - call_args = [a for call in FindNodes(ir.CallStatement).visit(routine.body) for a in call.arguments] + call_args = [a for call in FindNodes(ir.CallStatement).visit(body) for a in call.arguments] # First get rank mismatched call statement args vmap = {} - for call in [call for call in FindNodes(ir.CallStatement).visit(routine.body) if call.name in targets]: + for call in [call for call in FindNodes(ir.CallStatement).visit(body) if call.name in targets]: for dummy, arg in call.arg_map.items(): arg_rank = self.get_call_arg_rank(arg) dummy_rank = len(dummy.shape) if getattr(dummy, 'shape', None) else 0 @@ -256,7 +260,7 @@ def process_kernel(self, routine, targets): vmap.update(self._update_expr_map(arg, arg_rank, block_index)) # Now get the rest of the variables - for var in [var for var in FindVariables().visit(routine.body) + for var in [var for var in FindVariables().visit(body) if getattr(var, 'dimensions', None) and not var in call_args]: local_rank = len(var.dimensions) @@ -272,4 +276,14 @@ def process_kernel(self, routine, targets): # filter out arrays marked for exclusion vmap = {k: v for k, v in vmap.items() if not any(e in k for e in self.exclude)} - routine.body = SubstituteExpressions(vmap).visit(routine.body) + # finally we perform the substitution + return SubstituteExpressions(vmap).visit(body) + + def process_kernel(self, routine, targets): + + # we skip routines that do not contain the block index or any known alias + if not (block_index := self.get_block_index(routine)): + return + + # for kernels we process the entire subroutine body + routine.body = self.process_body(routine.body, block_index, targets) From 9eacc8ee04b4d547b378ee2fd5f56c4677adc349 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Mon, 22 Apr 2024 15:04:19 +0200 Subject: [PATCH 12/37] Rename file to block_index_transformations.py --- loki/transformations/__init__.py | 2 +- .../{unprivatise_structs.py => block_index_transformations.py} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename loki/transformations/{unprivatise_structs.py => block_index_transformations.py} (100%) diff --git a/loki/transformations/__init__.py b/loki/transformations/__init__.py index 0d3e2bd28..bfe6e25fb 100644 --- a/loki/transformations/__init__.py +++ b/loki/transformations/__init__.py @@ -30,4 +30,4 @@ from loki.transformations.transform_region import * # noqa from loki.transformations.pool_allocator import * # noqa from loki.transformations.utilities import * # noqa -from transformations.unprivatise_structs import * # noqa +from transformations.block_index_transformations import * # noqa diff --git a/loki/transformations/unprivatise_structs.py b/loki/transformations/block_index_transformations.py similarity index 100% rename from loki/transformations/unprivatise_structs.py rename to loki/transformations/block_index_transformations.py From 7ffb876ea2adfc29fa710fa9259315d6318110f0 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Mon, 22 Apr 2024 16:24:48 +0200 Subject: [PATCH 13/37] BlockViewToFieldViewTrafo: add documentation --- .../block_index_transformations.py | 55 +++++++++++++++++-- 1 file changed, 51 insertions(+), 4 deletions(-) diff --git a/loki/transformations/block_index_transformations.py b/loki/transformations/block_index_transformations.py index 2f69d6531..1930ab51e 100644 --- a/loki/transformations/block_index_transformations.py +++ b/loki/transformations/block_index_transformations.py @@ -13,14 +13,48 @@ from transformations.single_column_coalesced import SCCBaseTransformation -__all__ = ['UnprivatiseStructsTransformation', 'BlockIndexInjectTransformation'] +__all__ = ['BlockViewToFieldViewTransformation', 'BlockIndexInjectTransformation'] -class UnprivatiseStructsTransformation(Transformation): +class BlockViewToFieldViewTransformation(Transformation): + """ + A very IFS-specific transformation to replace per-block, i.e. per OpenMP-thread, view pointers with per-field + view pointers. It should be noted that this transformation only replaces the view pointers but does not actually + insert the block index into the promoted view pointers. Therefore this transformation must always be followed by + the :any:`BlockIndexInjectTransformation`. + For example, the following code: - _key = 'UnprivatiseStructsTransformation' + .. code-block:: fortran + + do jlon=1,nproma + mystruct%p(jlon,:) = 0. + enddo + + is transformed to: + + .. code-block:: fortran + + do jlon=1,nproma + mystruct%p_field(jlon,:) = 0. + enddo + + Where the rank of ``my_struct%p_field`` is one greater than that of ``my_struct%p``. + + Parameters + ---------- + horizontal : :any:`Dimension` + :any:`Dimension` object describing the variable conventions used in code + to define the horizontal data dimension and iteration space. + exclude : tuple + List of data structures to be intentionally excluded from this transformation. This list is intended + primarily for data structures that are not memory-blocked. + key : str, optional + Specify a different identifier under which trafo_data is stored + """ + + _key = 'BlockViewToFieldViewTransformation' + """Default identifier for trafo_data entry""" - # This trafo only operates on procedures item_filter = (ProcedureItem,) def __init__(self, horizontal, exclude=(), key=None): @@ -31,6 +65,7 @@ def __init__(self, horizontal, exclude=(), key=None): @staticmethod def get_parent_typedef(var, symbol_map): + """Utility method to retrieve derived-tyoe definition of parent type.""" if not var.parent.type.dtype.typedef == BasicType.DEFERRED: return var.parent.type.dtype.typedef @@ -65,6 +100,9 @@ def _build_parkind_import(self, field_array_module, wrapper_types): return ir.Import(module='PARKIND1', symbols=as_tuple(_vars)) def _build_field_array_types(self, field_array_module, wrapper_types): + """ + Build FIELD_RANKSUFF_ARRAY type-definitions. + """ typedefs = () for _type in wrapper_types: @@ -96,6 +134,9 @@ def _build_field_array_types(self, field_array_module, wrapper_types): return typedefs def _create_dummy_field_api_defs(self, field_array_mod_imports): + """ + Create dummy definitions for FIELD_API wrapper-types to enrich typedefs. + """ wrapper_types = {sym.name for imp in field_array_mod_imports for sym in imp.symbols} @@ -114,6 +155,10 @@ def _create_dummy_field_api_defs(self, field_array_mod_imports): @staticmethod def propagate_defs_to_children(key, definitions, successors): + """ + Enrich all successors with the dummy FIELD_API definitions. + """ + for child in successors: child.ir.enrich(definitions) child.trafo_data.update({key: {'definitions': definitions}}) @@ -132,6 +177,8 @@ def process_driver(self, routine, successors): #TODO: we also need to process any code inside a loki/acdc parallel pragma at the driver layer def build_ydvars_global_gfl_ptr(self, var): + """Replace accesses to thread-local ``YDVARS%GFL_PTR`` with global ``YDVARS%GFL_PTR_G``.""" + if (parent := var.parent): parent = self.build_ydvars_global_gfl_ptr(parent) From 2712538246c8760c5bdd860251c68afe9bf28979 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Mon, 22 Apr 2024 17:49:25 +0200 Subject: [PATCH 14/37] BlockIndexInjectTrafo: add documentation --- .../block_index_transformations.py | 87 +++++++++++++++++++ 1 file changed, 87 insertions(+) diff --git a/loki/transformations/block_index_transformations.py b/loki/transformations/block_index_transformations.py index 1930ab51e..b3ead07f5 100644 --- a/loki/transformations/block_index_transformations.py +++ b/loki/transformations/block_index_transformations.py @@ -242,8 +242,83 @@ def process_kernel(self, routine, item, successors, targets): class BlockIndexInjectTransformation(Transformation): + """ + A transformation pass to inject the block-index in arrays promoted by a previous transformation pass. As such, + this transformation also relies on the block-index, or a known alias, being *already* present in routines that + are to be transformed. + + For array access in a :any:`Subroutine` body, it operates by comparing the local shape of an array with its + declared shape. If the local shape is of rank one less than the declared shape, then the block-index is appended + to the array's dimensions. + + For :any:`CallStatement` arguments, if the rank of the argument is one less than that of the corresponding + dummy-argument, the block-index is appended to the argument's dimensions. It should be noted that this logic relies on + the :any:`CallStatement` being free of any sequence-association. + + For example, the following code: + + .. code-block:: fortran + + subroutine kernel1(nblks, ...) + ... + integer, intent(in) :: nblks + integer :: ibl + real :: var(jlon,nlev,nblks) + + do ibl=1,nblks + do jlon=1,nproma + var(jlon,:) = 0. + enddo + + call kernel2(var,...) + enddo + ... + end subroutine kernel1 + + subroutine kernel2(var, ...) + ... + real :: var(jlon,nlev) + end subroutine kernel2 + + is transformed to: + + .. code-block:: fortran + + subroutine kernel1(nblks, ...) + ... + integer, intent(in) :: nblks + integer :: ibl + real :: var(jlon,nlev,nblks) + + do ibl=1,nblks + do jlon=1,nproma + var(jlon,:,ibl) = 0. + enddo + + call kernel2(var(:,:,ibl),...) + enddo + ... + end subroutine kernel1 + + subroutine kernel2(var, ...) + ... + real :: var(jlon,nlev) + end subroutine kernel2 + + Parameters + ---------- + block_dim : :any:`Dimension` + :any:`Dimension` object describing the variable conventions used in code + to define the blocking data dimension and iteration space. + exclude : tuple + List of data structures to be intentionally excluded from this transformation. This list is intended + primarily for data structures that are not memory-blocked. + key : str, optional + Specify a different identifier under which trafo_data is stored + """ _key = 'BlockIndexInjectTransformation' + """Default identifier for trafo_data entry""" # This trafo only operates on procedures item_filter = (ProcedureItem,) @@ -266,6 +341,10 @@ def transform_subroutine(self, routine, **kwargs): @staticmethod def _update_expr_map(var, rank, index): + """ + Return a map with the block-index appended to the variable's dimensions. + """ + if getattr(var, 'dimensions', None): return {var: var.clone(dimensions=var.dimensions + as_tuple(index))} return {var: @@ -273,6 +352,10 @@ def _update_expr_map(var, rank, index): @staticmethod def get_call_arg_rank(arg): + """ + Utility to retrieve the local rank of a :any:`CallSatement` argument. + """ + rank = len(arg.shape) if getattr(arg, 'shape', None) else 0 if getattr(arg, 'dimensions', None): # We assume here that the callstatement is free of sequence association @@ -281,6 +364,10 @@ def get_call_arg_rank(arg): return rank def get_block_index(self, routine): + """ + Utility to retrieve the block-index loop induction variable. + """ + variable_map = routine.variable_map if (block_index := variable_map.get(self.block_dim.index, None)): return block_index From 2fcdc08f4a98bea5f0619330bb2291810ea1dc31 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Mon, 22 Apr 2024 17:53:14 +0200 Subject: [PATCH 15/37] Change name of loki-transform script arg --- cmake/loki_transform.cmake | 8 ++++---- cmake/loki_transform_helpers.cmake | 4 ++-- scripts/loki_transform.py | 12 ++++++------ 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/cmake/loki_transform.cmake b/cmake/loki_transform.cmake index dc6cad6a8..6083b4730 100644 --- a/cmake/loki_transform.cmake +++ b/cmake/loki_transform.cmake @@ -48,7 +48,7 @@ function( loki_transform ) set( options CPP DATA_OFFLOAD REMOVE_OPENMP ASSUME_DEVICEPTR TRIM_VECTOR_SECTIONS GLOBAL_VAR_OFFLOAD REMOVE_DERIVED_ARGS INLINE_MEMBERS RESOLVE_SEQUENCE_ASSOCIATION DERIVE_ARGUMENT_ARRAY_SHAPE - UNPRIVATISE_STRUCTS + BLOCKVIEW_TO_FIELDVIEW ) set( oneValueArgs COMMAND MODE DIRECTIVE FRONTEND CONFIG BUILDDIR @@ -227,7 +227,7 @@ function( loki_transform_target ) set( options NO_PLAN_SOURCEDIR COPY_UNMODIFIED CPP CPP_PLAN INLINE_MEMBERS RESOLVE_SEQUENCE_ASSOCIATION DERIVE_ARGUMENT_ARRAY_SHAPE TRIM_VECTOR_SECTIONS GLOBAL_VAR_OFFLOAD - UNPRIVATISE_STRUCTS + BLOCKVIEW_TO_FIELDVIEW ) set( single_value_args TARGET COMMAND MODE DIRECTIVE FRONTEND CONFIG PLAN ) set( multi_value_args SOURCES HEADERS DEFINITIONS INCLUDES ) @@ -313,8 +313,8 @@ function( loki_transform_target ) list( APPEND _TRANSFORM_OPTIONS GLOBAL_VAR_OFFLOAD ) endif() - if( _PAR_T_UNPRIVATISE_STRUCTS ) - list( APPEND _TRANSFORM_OPTIONS UNPRIVATISE_STRUCTS ) + if( _PAR_T_BLOCKVIEW_TO_FIELDVIEW ) + list( APPEND _TRANSFORM_OPTIONS BLOCKVIEW_TO_FIELDVIEW ) endif() loki_transform( diff --git a/cmake/loki_transform_helpers.cmake b/cmake/loki_transform_helpers.cmake index 092d4fb68..a95416478 100644 --- a/cmake/loki_transform_helpers.cmake +++ b/cmake/loki_transform_helpers.cmake @@ -120,8 +120,8 @@ macro( _loki_transform_parse_options ) list( APPEND _ARGS --derive-argument-array-shape ) endif() - if( _PAR_UNPRIVATISE_STRUCTS ) - list( APPEND _ARGS --unprivatise-structs ) + if( _PAR_BLOCKVIEW_TO_FIELDVIEW ) + list( APPEND _ARGS --blockview-to-fieldview ) endif() endmacro() diff --git a/scripts/loki_transform.py b/scripts/loki_transform.py index fad250fae..32a0e9438 100644 --- a/scripts/loki_transform.py +++ b/scripts/loki_transform.py @@ -120,14 +120,14 @@ def cli(debug): help="Recursively derive explicit shape dimension for argument arrays") @click.option('--eliminate-dead-code/--no-eliminate-dead-code', default=True, help='Perform dead code elimination, where unreachable branches are trimmed from the code.') -@click.option('--unprivatise-structs', is_flag=True, default=False, - help='Unprivatise OpenMP thread-private data structs.') +@click.option('--blockview-to-fieldview', is_flag=True, default=False, + help='Replace per-block view pointers with per-field view pointers.') def convert( mode, config, build, source, header, cpp, directive, include, define, omni_include, xmod, data_offload, remove_openmp, assume_deviceptr, frontend, trim_vector_sections, global_var_offload, remove_derived_args, inline_members, inline_marked, resolve_sequence_association, resolve_sequence_association_inlined_calls, - derive_argument_array_shape, eliminate_dead_code, unprivatise_structs + derive_argument_array_shape, eliminate_dead_code, blockview_to_fieldview ): """ Batch-processing mode for Fortran-to-Fortran transformations that @@ -222,9 +222,9 @@ def convert( ) scheduler.process(transformation=sanitise_trafo) - if unprivatise_structs: - assert config.pipelines['unprivatise_structs'] - scheduler.process( config.pipelines['unprivatise_structs'] ) + if blockview_to_fieldview: + assert config.pipelines['blockview_to_fieldview'] + scheduler.process( config.pipelines['blockview_to_fieldview'] ) # Perform source-inlining either from CLI arguments or from config inline_trafo = scheduler.config.transformations.get('InlineTransformation', None) From dd8f0d456001dfa4035b326d8eb5c02ec5f71d29 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Wed, 24 Apr 2024 09:39:20 +0200 Subject: [PATCH 16/37] Make exclude_arrays configurable via the scheduler --- .../block_index_transformations.py | 52 +++++++++---------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/loki/transformations/block_index_transformations.py b/loki/transformations/block_index_transformations.py index b3ead07f5..5bc8a3c8b 100644 --- a/loki/transformations/block_index_transformations.py +++ b/loki/transformations/block_index_transformations.py @@ -38,16 +38,15 @@ class BlockViewToFieldViewTransformation(Transformation): mystruct%p_field(jlon,:) = 0. enddo - Where the rank of ``my_struct%p_field`` is one greater than that of ``my_struct%p``. + Where the rank of ``my_struct%p_field`` is one greater than that of ``my_struct%p``. Specific arrays in individual + routines can also be marked for exclusion from this transformation by assigning them to the `exclude_arrays` list + in the :any:`SchedulerConfig`. Parameters ---------- horizontal : :any:`Dimension` :any:`Dimension` object describing the variable conventions used in code to define the horizontal data dimension and iteration space. - exclude : tuple - List of data structures to be intentionally excluded from this transformation. This list is intended - primarily for data structures that are not memory-blocked. key : str, optional Specify a different identifier under which trafo_data is stored """ @@ -57,9 +56,8 @@ class BlockViewToFieldViewTransformation(Transformation): item_filter = (ProcedureItem,) - def __init__(self, horizontal, exclude=(), key=None): + def __init__(self, horizontal, key=None): self.horizontal = horizontal - self.exclude = exclude if key: self._key = key @@ -82,8 +80,10 @@ def transform_subroutine(self, routine, **kwargs): role = kwargs['role'] targets = tuple(str(t).lower() for t in as_tuple(kwargs.get('targets', None))) + exclude_arrays = item.config.get('exclude_arrays', []) + if role == 'kernel': - self.process_kernel(routine, item, successors, targets) + self.process_kernel(routine, item, successors, targets, exclude_arrays) if role == 'driver': self.process_driver(routine, successors) @@ -189,7 +189,7 @@ def build_ydvars_global_gfl_ptr(self, var): return var.clone(name=var.name.upper().replace('GFL_PTR', 'GFL_PTR_G'), parent=parent, type=_type) - def process_body(self, body, symbol_map, definitions, successors, targets): + def process_body(self, body, symbol_map, definitions, successors, targets, exclude_arrays): # build list of type-bound array access using the horizontal index _vars = [var for var in FindVariables().visit(body) @@ -214,7 +214,7 @@ def process_body(self, body, symbol_map, definitions, successors, targets): vmap = recursive_expression_map_update(vmap) # filter out arrays marked for exclusion - vmap = {k: v for k, v in vmap.items() if not any(e in k for e in self.exclude)} + vmap = {k: v for k, v in vmap.items() if not any(e in k for e in exclude_arrays)} # propagate dummy field_api wrapper definitions to children self.propagate_defs_to_children(self._key, definitions, successors) @@ -223,22 +223,19 @@ def process_body(self, body, symbol_map, definitions, successors, targets): return SubstituteExpressions(vmap).visit(body) - def process_kernel(self, routine, item, successors, targets): + def process_kernel(self, routine, item, successors, targets, exclude_arrays): # Sanitize the subroutine resolve_associates(routine) v_index = SCCBaseTransformation.get_integer_variable(routine, name=self.horizontal.index) SCCBaseTransformation.resolve_masked_stmts(routine, loop_variable=v_index) - if self.horizontal.bounds[0] in routine.variables and self.horizontal.bounds[1] in routine.variables: - _bounds = self.horizontal.bounds - else: - _bounds = self.horizontal._bounds_aliases - SCCBaseTransformation.resolve_vector_dimension(routine, loop_variable=v_index, bounds=_bounds) + bounds = SCCBaseTransformation.get_horizontal_loop_bounds(routine, self.horizontal) + SCCBaseTransformation.resolve_vector_dimension(routine, loop_variable=v_index, bounds=bounds) # for kernels we process the entire body routine.body = self.process_body(routine.body, routine.symbol_map, item.trafo_data[self._key]['definitions'], - successors, targets) + successors, targets, exclude_arrays) class BlockIndexInjectTransformation(Transformation): @@ -305,14 +302,14 @@ class BlockIndexInjectTransformation(Transformation): real :: var(jlon,nlev) end subroutine kernel2 + Specific arrays in individual routines can also be marked for exclusion from this transformation by assigning + them to the `exclude_arrays` list in the :any:`SchedulerConfig`. + Parameters ---------- block_dim : :any:`Dimension` :any:`Dimension` object describing the variable conventions used in code to define the blocking data dimension and iteration space. - exclude : tuple - List of data structures to be intentionally excluded from this transformation. This list is intended - primarily for data structures that are not memory-blocked. key : str, optional Specify a different identifier under which trafo_data is stored """ @@ -323,9 +320,8 @@ class BlockIndexInjectTransformation(Transformation): # This trafo only operates on procedures item_filter = (ProcedureItem,) - def __init__(self, block_dim, exclude=(), key=None): + def __init__(self, block_dim, key=None): self.block_dim = block_dim - self.exclude = exclude if key: self._key = key @@ -334,8 +330,12 @@ def transform_subroutine(self, routine, **kwargs): role = kwargs['role'] targets = tuple(str(t).lower() for t in as_tuple(kwargs.get('targets', None))) + exclude_arrays = [] + if (item := kwargs.get('item', None)): + exclude_arrays = item.config.get('exclude_arrays', []) + if role == 'kernel': - self.process_kernel(routine, targets) + self.process_kernel(routine, targets, exclude_arrays) #TODO: we also need to process any code inside a loki/acdc parallel pragma at the driver layer @@ -379,7 +379,7 @@ def get_block_index(self, routine): return block_index - def process_body(self, body, block_index, targets): + def process_body(self, body, block_index, targets, exclude_arrays): # The logic for callstatement args differs from other variables in the body, # so we build a list to filter call_args = [a for call in FindNodes(ir.CallStatement).visit(body) for a in call.arguments] @@ -408,16 +408,16 @@ def process_body(self, body, block_index, targets): vmap.update(self._update_expr_map(var, decl_rank, block_index)) # filter out arrays marked for exclusion - vmap = {k: v for k, v in vmap.items() if not any(e in k for e in self.exclude)} + vmap = {k: v for k, v in vmap.items() if not any(e in k for e in exclude_arrays)} # finally we perform the substitution return SubstituteExpressions(vmap).visit(body) - def process_kernel(self, routine, targets): + def process_kernel(self, routine, targets, exclude_arrays): # we skip routines that do not contain the block index or any known alias if not (block_index := self.get_block_index(routine)): return # for kernels we process the entire subroutine body - routine.body = self.process_body(routine.body, block_index, targets) + routine.body = self.process_body(routine.body, block_index, targets, exclude_arrays) From 5ce3b84aed8815bb815c074a56648f165e926d08 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Wed, 24 Apr 2024 09:42:03 +0200 Subject: [PATCH 17/37] BlockViewtoFieldViewTrafo: make switch to global gfl_ptr optional --- .../block_index_transformations.py | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/loki/transformations/block_index_transformations.py b/loki/transformations/block_index_transformations.py index 5bc8a3c8b..3c3814bfd 100644 --- a/loki/transformations/block_index_transformations.py +++ b/loki/transformations/block_index_transformations.py @@ -8,7 +8,7 @@ from loki import ( Transformation, ProcedureItem, ir, Module, as_tuple, SymbolAttributes, BasicType, Variable, RangeIndex, Array, FindVariables, resolve_associates, SubstituteExpressions, FindNodes, - resolve_typebound_var, recursive_expression_map_update + recursive_expression_map_update ) from transformations.single_column_coalesced import SCCBaseTransformation @@ -47,6 +47,8 @@ class BlockViewToFieldViewTransformation(Transformation): horizontal : :any:`Dimension` :any:`Dimension` object describing the variable conventions used in code to define the horizontal data dimension and iteration space. + global_gfl_ptr: bool + Toggle whether thread-local gfl_ptr should be replaced with global. key : str, optional Specify a different identifier under which trafo_data is stored """ @@ -56,8 +58,9 @@ class BlockViewToFieldViewTransformation(Transformation): item_filter = (ProcedureItem,) - def __init__(self, horizontal, key=None): + def __init__(self, horizontal, global_gfl_ptr=False, key=None): self.horizontal = horizontal + self.global_gfl_ptr = global_gfl_ptr if key: self._key = key @@ -209,9 +212,10 @@ def process_body(self, body, symbol_map, definitions, successors, targets, exclu for var in _vars} # replace thread-private GFL_PTR with global - vmap.update({v: self.build_ydvars_global_gfl_ptr(vmap.get(v, v)) - for v in FindVariables().visit(body) if 'ydvars%gfl_ptr' in v.name.lower()}) - vmap = recursive_expression_map_update(vmap) + if self.global_gfl_ptr: + vmap.update({v: self.build_ydvars_global_gfl_ptr(vmap.get(v, v)) + for v in FindVariables().visit(body) if 'ydvars%gfl_ptr' in v.name.lower()}) + vmap = recursive_expression_map_update(vmap) # filter out arrays marked for exclusion vmap = {k: v for k, v in vmap.items() if not any(e in k for e in exclude_arrays)} @@ -249,8 +253,8 @@ class BlockIndexInjectTransformation(Transformation): to the array's dimensions. For :any:`CallStatement` arguments, if the rank of the argument is one less than that of the corresponding - dummy-argument, the block-index is appended to the argument's dimensions. It should be noted that this logic relies on - the :any:`CallStatement` being free of any sequence-association. + dummy-argument, the block-index is appended to the argument's dimensions. It should be noted that this logic relies + on the :any:`CallStatement` being free of any sequence-association. For example, the following code: @@ -375,7 +379,7 @@ def get_block_index(self, routine): index_name = [alias for alias in self.block_dim._index_aliases if alias.rsplit('%')[0] in variable_map][0] - block_index = resolve_typebound_var(index_name, variable_map) + block_index = routine.resolve_typebound_var(index_name, variable_map) return block_index From a75045010245d7ba5230fede7912ba3ba0c2e17e Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Wed, 24 Apr 2024 11:07:07 +0200 Subject: [PATCH 18/37] BlockViewtoFieldViewTrafo: add bailout for routines marked as seq --- loki/transformations/block_index_transformations.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/loki/transformations/block_index_transformations.py b/loki/transformations/block_index_transformations.py index 3c3814bfd..efe2c89aa 100644 --- a/loki/transformations/block_index_transformations.py +++ b/loki/transformations/block_index_transformations.py @@ -234,6 +234,10 @@ def process_kernel(self, routine, item, successors, targets, exclude_arrays): v_index = SCCBaseTransformation.get_integer_variable(routine, name=self.horizontal.index) SCCBaseTransformation.resolve_masked_stmts(routine, loop_variable=v_index) + # Bail if routine is marked as sequential or routine has already been processed + if SCCBaseTransformation.check_routine_pragmas(routine, directive=None): + return + bounds = SCCBaseTransformation.get_horizontal_loop_bounds(routine, self.horizontal) SCCBaseTransformation.resolve_vector_dimension(routine, loop_variable=v_index, bounds=bounds) From 1d474ea2370b1631e399bd496f741dcafba12f09 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Wed, 24 Apr 2024 15:23:30 +0200 Subject: [PATCH 19/37] Dimension: add index_expressions property --- loki/dimension.py | 12 ++++++++++++ loki/transformations/block_index_transformations.py | 11 ++++------- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/loki/dimension.py b/loki/dimension.py index fc1386267..a7611b403 100644 --- a/loki/dimension.py +++ b/loki/dimension.py @@ -122,3 +122,15 @@ def bounds_expressions(self): exprs = [expr + (b,) for expr, b in zip(exprs, self._bounds_aliases)] return as_tuple(exprs) + + @property + def index_expressions(self): + """ + A list of all expression strings representing the index expression of an iteration space (loop). + """ + + exprs = [self.index,] + if self._index_aliases: + exprs += [alias for alias in self._index_aliases] + + return as_tuple(exprs) diff --git a/loki/transformations/block_index_transformations.py b/loki/transformations/block_index_transformations.py index efe2c89aa..f28680e45 100644 --- a/loki/transformations/block_index_transformations.py +++ b/loki/transformations/block_index_transformations.py @@ -379,13 +379,10 @@ def get_block_index(self, routine): variable_map = routine.variable_map if (block_index := variable_map.get(self.block_dim.index, None)): return block_index - if any(i.rsplit('%')[0] in variable_map for i in self.block_dim._index_aliases): - index_name = [alias for alias in self.block_dim._index_aliases - if alias.rsplit('%')[0] in variable_map][0] - - block_index = routine.resolve_typebound_var(index_name, variable_map) - - return block_index + if (block_index := [i for i in self.block_dim.index_expressions + if i.split('%', maxsplit=1)[0] in variable_map]): + return routine.resolve_typebound_var(block_index[0], variable_map) + return None def process_body(self, body, block_index, targets, exclude_arrays): # The logic for callstatement args differs from other variables in the body, From 964002b939e5d5b1c5a1dd8c6e6bc4b3dd950c2e Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Wed, 24 Apr 2024 15:23:48 +0200 Subject: [PATCH 20/37] BlockIndexInjectTrafos: add tests --- .../block_index_transformations.py | 12 +- .../tests/test_block_index_inject.py | 327 ++++++++++++++++++ 2 files changed, 333 insertions(+), 6 deletions(-) create mode 100644 loki/transformations/tests/test_block_index_inject.py diff --git a/loki/transformations/block_index_transformations.py b/loki/transformations/block_index_transformations.py index f28680e45..d1bee9596 100644 --- a/loki/transformations/block_index_transformations.py +++ b/loki/transformations/block_index_transformations.py @@ -68,10 +68,11 @@ def __init__(self, horizontal, global_gfl_ptr=False, key=None): def get_parent_typedef(var, symbol_map): """Utility method to retrieve derived-tyoe definition of parent type.""" - if not var.parent.type.dtype.typedef == BasicType.DEFERRED: + if not var.parent.type.dtype.typedef is BasicType.DEFERRED: return var.parent.type.dtype.typedef - if not symbol_map[var.parent.type.dtype.name].type.dtype.typedef == BasicType.DEFERRED: - return symbol_map[var.parent.type.dtype.name].type.dtype.typedef + if (_parent_type := symbol_map.get(var.parent.type.dtype.name, None)): + if not _parent_type.type.dtype.typedef is BasicType.DEFERRED: + return _parent_type.type.dtype.typedef raise RuntimeError(f'Container data-type {var.parent.type.dtype.name} not enriched') def transform_subroutine(self, routine, **kwargs): @@ -201,9 +202,8 @@ def process_body(self, body, symbol_map, definitions, successors, targets, exclu # build list of type-bound view pointers passed as subroutine arguments for call in [call for call in FindNodes(ir.CallStatement).visit(body) if call.name in targets]: _args = {a: d for d, a in call.arg_map.items() if isinstance(d, Array)} - _args = {a: d for a, d in _args.items() - if any(v in d.shape for v in self.horizontal.size_expressions) and a.parents} - _vars += list(_args) + _vars += [a for a, d in _args.items() + if any(v in d.shape for v in self.horizontal.size_expressions) and a.parents] # replace per-block view pointers with full field pointers vmap = {var: diff --git a/loki/transformations/tests/test_block_index_inject.py b/loki/transformations/tests/test_block_index_inject.py new file mode 100644 index 000000000..547ae9e18 --- /dev/null +++ b/loki/transformations/tests/test_block_index_inject.py @@ -0,0 +1,327 @@ +# (C) Copyright 2018- ECMWF. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +from shutil import rmtree +import pytest + +from loki import ( + Dimension, gettempdir, Scheduler, OMNI, FindNodes, Assignment, FindVariables, CallStatement, Subroutine, + Item +) +from conftest import available_frontends +from transformations import BlockViewToFieldViewTransformation, BlockIndexInjectTransformation + +@pytest.fixture(scope='module', name='horizontal') +def fixture_horizontal(): + return Dimension(name='horizontal', size='nlon', index='jl', bounds=('start', 'end'), + aliases=('nproma',), bounds_aliases=('bnds%start', 'bnds%end')) + + +@pytest.fixture(scope='module', name='blocking') +def fixture_blocking(): + return Dimension(name='blocking', size='nb', index='ibl', index_aliases='bnds%kbl') + + +@pytest.fixture(scope='module', name='config') +def fixture_config(): + """ + Default configuration dict with basic options. + """ + return { + 'default': { + 'mode': 'idem', + 'role': 'kernel', + 'expand': True, + 'strict': True, + 'enable_imports': True, + 'disable': ['*%init', '*%final'] + }, + } + + +@pytest.fixture(scope='module', name='blockview_to_fieldview_code', params=[True, False]) +def fixture_blockview_to_fieldview_code(request): + fcode = { + #------------- + 'variable_mod': ( + #------------- +""" +module variable_mod + implicit none + + type variable_3d + real, pointer :: p(:,:) => null() + real, pointer :: p_field(:,:,:) => null() + end type variable_3d + + type variable_3d_ptr + integer :: comp + type(variable_3d), pointer :: ptr => null() + end type variable_3d_ptr + +end module variable_mod +""" + ).strip(), + #-------------------- + 'field_variables_mod': ( + #-------------------- +""" +module field_variables_mod + use variable_mod, only: variable_3d, variable_3d_ptr + implicit none + + type field_variables + type(variable_3d_ptr), allocatable :: gfl_ptr_g(:) + type(variable_3d_ptr), pointer :: gfl_ptr(:) => null() + type(variable_3d) :: var + end type field_variables + +end module field_variables_mod +""" + ).strip(), + #------------------- + 'container_type_mod': ( + #------------------- +""" +module container_type_mod + implicit none + + type container_3d_var + real, pointer :: p(:,:) => null() + real, pointer :: p_field(:,:,:) => null() + end type container_3d_var + + type container_type + type(container_3d_var), allocatable :: vars(:) + end type container_type + +end module container_type_mod +""" + ).strip(), + #-------------- + 'dims_type_mod': ( + #-------------- +""" +module dims_type_mod + type dims_type + integer :: start, end, kbl, nb + end type dims_type +end module dims_type_mod +""" + ).strip(), + #------- + 'driver': ( + #------- +f""" +subroutine driver(data, ydvars, container, nlon, nlev, {'start, end, nb' if request.param else 'bnds'}) + use field_array_module, only: field_3rb_array + use container_type_mod, only: container_type + use field_variables_mod, only: field_variables + {'use dims_type_mod, only: dims_type' if not request.param else ''} + implicit none + + #include "kernel.intfb.h" + + real, intent(inout) :: data(:,:,:) + integer, intent(in) :: nlon, nlev + type(field_variables), intent(inout) :: ydvars + type(container_type), intent(inout) :: container + {'integer, intent(in) :: start, end, nb' if request.param else 'type(dims_type), intent(in) :: bnds'} + + integer :: ibl + type(field_3rb_array) :: yla_data + + call yla_data%init(data) + + do ibl=1,{'nb' if request.param else 'bnds%nb'} + {'bnds%kbl = ibl' if not request.param else ''} + call kernel(nlon, nlev, {'start, end, ibl' if request.param else 'bnds'}, ydvars, container, yla_data) + enddo + + call yla_data%final() + +end subroutine driver +""" + ).strip(), + #------- + 'kernel': ( + #------- +f""" +subroutine kernel(nlon, nlev, {'start, end, ibl' if request.param else 'bnds'}, ydvars, container, yla_data) + use field_array_module, only: field_3rb_array + use container_type_mod, only: container_type + use field_variables_mod, only: field_variables + {'use dims_type_mod, only: dims_type' if not request.param else ''} + implicit none + + #include "another_kernel.intfb.h" + + integer, intent(in) :: nlon, nlev + type(field_variables), intent(inout) :: ydvars + type(container_type), intent(inout) :: container + {'integer, intent(in) :: start, end, ibl' if request.param else 'type(dims_type), intent(in) :: bnds'} + type(field_3rb_array), intent(inout) :: yda_data + + integer :: jl, jfld + {'associate(start=>bnds%start, end=>bnds%end, ibl=>bnds%kbl)' if not request.param else ''} + + ydvars%var%p_field(:,:) = 0. !... this should only get the block-index + ydvars%var%p_field(:,:,ibl) = 0. !... this should be untouched + + yda_data%p(start:end,:) = 1 + ydvars%var%p(start:end,:) = 1 + + do jfld=1,size(ydvars%gfl_ptr) + do jl=start,end + ydvars%gfl_ptr(jfld)%ptr%p(jl,:) = yda_data%p(jl,:) + container%vars(ydvars%gfl_ptr(jfld)%comp)%p(jl,:) = 0. + enddo + enddo + + call another_kernel(start, end, nlon, nlev, yda_data%p) + + {'end associate' if not request.param else ''} +end subroutine kernel +""" + ).strip(), + #------- + 'another_kernel': ( + #------- +""" +subroutine another_kernel(start, end, nproma, nlev, data) + implicit none + integer, intent(in) :: nproma, nlev, start, end + real, intent(inout) :: data(nproma, nlev) +end subroutine another_kernel +""" + ).strip() + } + + workdir = gettempdir()/'test_blockview_to_fieldview' + if workdir.exists(): + rmtree(workdir) + workdir.mkdir() + for name, code in fcode.items(): + (workdir/f'{name}.F90').write_text(code) + + yield workdir, request.param + + rmtree(workdir) + + +@pytest.mark.parametrize('frontend', available_frontends(xfail=[(OMNI, + 'OMNI fails to import undefined module.')])) +def test_blockview_to_fieldview_pipeline(horizontal, blocking, config, frontend, blockview_to_fieldview_code): + + config['routines'] = { + 'driver': {'role': 'driver'} + } + + scheduler = Scheduler( + paths=(blockview_to_fieldview_code[0],), config=config, seed_routines='driver', frontend=frontend + ) + scheduler.process(BlockViewToFieldViewTransformation(horizontal, global_gfl_ptr=True)) + scheduler.process(BlockIndexInjectTransformation(blocking)) + + kernel = scheduler['#kernel'].ir + aliased_bounds = not blockview_to_fieldview_code[1] + ibl_expr = blocking.index + if aliased_bounds: + ibl_expr = blocking.index_expressions[1] + + assigns = FindNodes(Assignment).visit(kernel.body) + + # check that access pointers for arrays without horizontal index in dimensions were not updated + assert assigns[0].lhs == f'ydvars%var%p_field(:,:,{ibl_expr})' + assert assigns[1].lhs == f'ydvars%var%p_field(:,:,{ibl_expr})' + + # check that vector notation was resolved correctly + assert assigns[2].lhs == f'yda_data%p_field(jl, :, {ibl_expr})' + assert assigns[3].lhs == f'ydvars%var%p_field(jl, :, {ibl_expr})' + + # check thread-local ydvars%gfl_ptr was replaced with its global equivalent + gfl_ptr_vars = {v for v in FindVariables().visit(kernel.body) if 'ydvars%gfl_ptr' in v.name.lower()} + gfl_ptr_g_vars = {v for v in FindVariables().visit(kernel.body) if 'ydvars%gfl_ptr_g' in v.name.lower()} + assert gfl_ptr_g_vars + assert not gfl_ptr_g_vars - gfl_ptr_vars + + assert assigns[4].lhs == f'ydvars%gfl_ptr_g(jfld)%ptr%p_field(jl,:,{ibl_expr})' + assert assigns[4].rhs == f'yda_data%p_field(jl,:,{ibl_expr})' + assert assigns[5].lhs == f'container%vars(ydvars%gfl_ptr_g(jfld)%comp)%p_field(jl,:,{ibl_expr})' + + # check callstatement was updated correctly + call = FindNodes(CallStatement).visit(kernel.body)[0] + assert f'yda_data%p_field(:,:,{ibl_expr})' in call.arguments + + +@pytest.mark.parametrize('frontend', available_frontends(xfail=[(OMNI, + 'OMNI correctly complains about rank mismatch in assignment.')])) +def test_simple_blockindex_inject(blocking, frontend): + fcode = """ +subroutine kernel(nlon,nlev,nb,var) + implicit none + + interface + subroutine compute(nlon,nlev,var) + implicit none + integer, intent(in) :: nlon,nlev + real, intent(inout) :: var(nlon,nlev) + end subroutine compute + end interface + + integer, intent(in) :: nlon,nlev,nb + real, intent(inout) :: var(nlon,nlev,nb) !... this dummy arg was potentially promoted by a previous transformation + + integer :: ibl + + do ibl=1,nb !... this loop was potentially lowered by a previous transformation + var(:,:) = 0. + call compute(nlon,nlev,var) + enddo + +end subroutine kernel +""" + + kernel = Subroutine.from_source(fcode, frontend=frontend) + BlockIndexInjectTransformation(blocking).apply(kernel, role='kernel', targets=('compute',)) + + assigns = FindNodes(Assignment).visit(kernel.body) + assert assigns[0].lhs == 'var(:,:,ibl)' + + calls = FindNodes(CallStatement).visit(kernel.body) + assert 'var(:,:,ibl)' in calls[0].arguments + + +@pytest.mark.parametrize('frontend', available_frontends(xfail=[(OMNI, + 'OMNI complains about undefined type.')])) +def test_blockview_to_fieldview_exception(frontend, horizontal): + fcode = """ +subroutine kernel(nlon,nlev,var) + implicit none + + interface + subroutine compute(nlon,nlev,var) + implicit none + integer, intent(in) :: nlon,nlev + real, intent(inout) :: var(nlon,nlev) + end subroutine compute + end interface + + integer, intent(in) :: nlon,nlev + type(wrapped_field) :: var + + call compute(nlon,nlev,var%p) + +end subroutine kernel +""" + + kernel = Subroutine.from_source(fcode, frontend=frontend) + item = Item(name='#kernel', source=kernel) + item.trafo_data['foobar'] = {'definitions': []} + with pytest.raises(RuntimeError): + BlockViewToFieldViewTransformation(horizontal, key='foobar').apply(kernel, item=item, role='kernel', + targets=('compute',)) \ No newline at end of file From b58be6dcdec8b33124a1fbd934d518a68c8d0815 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Mon, 29 Apr 2024 11:32:42 +0200 Subject: [PATCH 21/37] Appease pylint --- loki/dimension.py | 2 +- loki/transformations/tests/test_block_index_inject.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/loki/dimension.py b/loki/dimension.py index a7611b403..48103b496 100644 --- a/loki/dimension.py +++ b/loki/dimension.py @@ -131,6 +131,6 @@ def index_expressions(self): exprs = [self.index,] if self._index_aliases: - exprs += [alias for alias in self._index_aliases] + exprs += list(self._index_aliases) return as_tuple(exprs) diff --git a/loki/transformations/tests/test_block_index_inject.py b/loki/transformations/tests/test_block_index_inject.py index 547ae9e18..af022ab59 100644 --- a/loki/transformations/tests/test_block_index_inject.py +++ b/loki/transformations/tests/test_block_index_inject.py @@ -324,4 +324,4 @@ def test_blockview_to_fieldview_exception(frontend, horizontal): item.trafo_data['foobar'] = {'definitions': []} with pytest.raises(RuntimeError): BlockViewToFieldViewTransformation(horizontal, key='foobar').apply(kernel, item=item, role='kernel', - targets=('compute',)) \ No newline at end of file + targets=('compute',)) From facaca92a2d1bf91adeb0cf62d958307b0b29c40 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Fri, 10 May 2024 12:41:15 +0200 Subject: [PATCH 22/37] BlockIndexInjectTrafo: add support for call statement kwargs --- loki/transformations/block_index_transformations.py | 2 +- loki/transformations/tests/test_block_index_inject.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/loki/transformations/block_index_transformations.py b/loki/transformations/block_index_transformations.py index d1bee9596..67250df9f 100644 --- a/loki/transformations/block_index_transformations.py +++ b/loki/transformations/block_index_transformations.py @@ -387,7 +387,7 @@ def get_block_index(self, routine): def process_body(self, body, block_index, targets, exclude_arrays): # The logic for callstatement args differs from other variables in the body, # so we build a list to filter - call_args = [a for call in FindNodes(ir.CallStatement).visit(body) for a in call.arguments] + call_args = [a for call in FindNodes(ir.CallStatement).visit(body) for a in call.arg_map.values()] # First get rank mismatched call statement args vmap = {} diff --git a/loki/transformations/tests/test_block_index_inject.py b/loki/transformations/tests/test_block_index_inject.py index af022ab59..3e73ca704 100644 --- a/loki/transformations/tests/test_block_index_inject.py +++ b/loki/transformations/tests/test_block_index_inject.py @@ -182,7 +182,7 @@ def fixture_blockview_to_fieldview_code(request): enddo enddo - call another_kernel(start, end, nlon, nlev, yda_data%p) + call another_kernel(start, end, nlon, nlev, data=yda_data%p) {'end associate' if not request.param else ''} end subroutine kernel @@ -255,7 +255,7 @@ def test_blockview_to_fieldview_pipeline(horizontal, blocking, config, frontend, # check callstatement was updated correctly call = FindNodes(CallStatement).visit(kernel.body)[0] - assert f'yda_data%p_field(:,:,{ibl_expr})' in call.arguments + assert f'yda_data%p_field(:,:,{ibl_expr})' in call.arg_map.values() @pytest.mark.parametrize('frontend', available_frontends(xfail=[(OMNI, From 4fc6af19635a66dbf0d5015639c54d86fa222cef Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Fri, 10 May 2024 12:51:19 +0200 Subject: [PATCH 23/37] BlockIndexTrafos: fix typos and cover untested lines --- .../block_index_transformations.py | 10 +-- .../tests/test_block_index_inject.py | 82 ++++++++++++++++--- 2 files changed, 76 insertions(+), 16 deletions(-) diff --git a/loki/transformations/block_index_transformations.py b/loki/transformations/block_index_transformations.py index 67250df9f..1c2890222 100644 --- a/loki/transformations/block_index_transformations.py +++ b/loki/transformations/block_index_transformations.py @@ -66,7 +66,7 @@ def __init__(self, horizontal, global_gfl_ptr=False, key=None): @staticmethod def get_parent_typedef(var, symbol_map): - """Utility method to retrieve derived-tyoe definition of parent type.""" + """Utility method to retrieve derived-type definition of parent type.""" if not var.parent.type.dtype.typedef is BasicType.DEFERRED: return var.parent.type.dtype.typedef @@ -77,8 +77,8 @@ def get_parent_typedef(var, symbol_map): def transform_subroutine(self, routine, **kwargs): - if not (item := kwargs['item']): - raise RuntimeError('Cannot apply DeprivatiseStructsTransformation without item to store definitions') + if not (item := kwargs.get('item', None)): + raise RuntimeError('Cannot apply BlockViewToFieldViewTransformation without item to store definitions') successors = kwargs.get('successors', ()) role = kwargs['role'] @@ -197,7 +197,7 @@ def process_body(self, body, symbol_map, definitions, successors, targets, exclu # build list of type-bound array access using the horizontal index _vars = [var for var in FindVariables().visit(body) - if isinstance(var, Array) and var.parents and self.horizontal.index in getattr(var, 'dimensions', ())] + if isinstance(var, Array) and var.parents and self.horizontal.index in var.dimensions] # build list of type-bound view pointers passed as subroutine arguments for call in [call for call in FindNodes(ir.CallStatement).visit(body) if call.name in targets]: @@ -361,7 +361,7 @@ def _update_expr_map(var, rank, index): @staticmethod def get_call_arg_rank(arg): """ - Utility to retrieve the local rank of a :any:`CallSatement` argument. + Utility to retrieve the local rank of a :any:`CallStatement` argument. """ rank = len(arg.shape) if getattr(arg, 'shape', None) else 0 diff --git a/loki/transformations/tests/test_block_index_inject.py b/loki/transformations/tests/test_block_index_inject.py index 3e73ca704..816b89152 100644 --- a/loki/transformations/tests/test_block_index_inject.py +++ b/loki/transformations/tests/test_block_index_inject.py @@ -182,7 +182,7 @@ def fixture_blockview_to_fieldview_code(request): enddo enddo - call another_kernel(start, end, nlon, nlev, data=yda_data%p) + call another_kernel(nlon, nlev, data=yda_data%p) {'end associate' if not request.param else ''} end subroutine kernel @@ -192,9 +192,12 @@ def fixture_blockview_to_fieldview_code(request): 'another_kernel': ( #------- """ -subroutine another_kernel(start, end, nproma, nlev, data) +subroutine another_kernel(nproma, nlev, data) implicit none - integer, intent(in) :: nproma, nlev, start, end + !... not a sequential routine but still labelling it as one to test the + !... bail-out mechanism + !$loki routine seq + integer, intent(in) :: nproma, nlev real, intent(inout) :: data(nproma, nlev) end subroutine another_kernel """ @@ -258,6 +261,59 @@ def test_blockview_to_fieldview_pipeline(horizontal, blocking, config, frontend, assert f'yda_data%p_field(:,:,{ibl_expr})' in call.arg_map.values() +@pytest.mark.parametrize('frontend', available_frontends(xfail=[(OMNI, + 'OMNI fails to import undefined module.')])) +@pytest.mark.parametrize('global_gfl_ptr', [False, True]) +def test_blockview_to_fieldview_only(horizontal, blocking, config, frontend, blockview_to_fieldview_code, + global_gfl_ptr): + + config['routines'] = { + 'driver': {'role': 'driver'} + } + + scheduler = Scheduler( + paths=(blockview_to_fieldview_code[0],), config=config, seed_routines='driver', frontend=frontend + ) + scheduler.process(BlockViewToFieldViewTransformation(horizontal, global_gfl_ptr=global_gfl_ptr)) + + kernel = scheduler['#kernel'].ir + aliased_bounds = not blockview_to_fieldview_code[1] + ibl_expr = blocking.index + if aliased_bounds: + ibl_expr = blocking.index_expressions[1] + + assigns = FindNodes(Assignment).visit(kernel.body) + + # check that access pointers for arrays without horizontal index in dimensions were not updated + assert assigns[0].lhs == 'ydvars%var%p_field(:,:)' + assert assigns[1].lhs == f'ydvars%var%p_field(:,:,{ibl_expr})' + + # check that vector notation was resolved correctly + assert assigns[2].lhs == 'yda_data%p_field(jl, :)' + assert assigns[3].lhs == 'ydvars%var%p_field(jl, :)' + + # check thread-local ydvars%gfl_ptr was replaced with its global equivalent + if global_gfl_ptr: + gfl_ptr_vars = {v for v in FindVariables().visit(kernel.body) if 'ydvars%gfl_ptr' in v.name.lower()} + gfl_ptr_g_vars = {v for v in FindVariables().visit(kernel.body) if 'ydvars%gfl_ptr_g' in v.name.lower()} + assert gfl_ptr_g_vars + assert not gfl_ptr_g_vars - gfl_ptr_vars + else: + assert not {v for v in FindVariables().visit(kernel.body) if 'ydvars%gfl_ptr_g' in v.name.lower()} + + assert assigns[4].rhs == 'yda_data%p_field(jl,:)' + if global_gfl_ptr: + assert assigns[4].lhs == 'ydvars%gfl_ptr_g(jfld)%ptr%p_field(jl,:)' + assert assigns[5].lhs == 'container%vars(ydvars%gfl_ptr_g(jfld)%comp)%p_field(jl,:)' + else: + assert assigns[4].lhs == 'ydvars%gfl_ptr(jfld)%ptr%p_field(jl,:)' + assert assigns[5].lhs == 'container%vars(ydvars%gfl_ptr(jfld)%comp)%p_field(jl,:)' + + # check callstatement was updated correctly + call = FindNodes(CallStatement).visit(kernel.body)[0] + assert 'yda_data%p_field' in call.arg_map.values() + + @pytest.mark.parametrize('frontend', available_frontends(xfail=[(OMNI, 'OMNI correctly complains about rank mismatch in assignment.')])) def test_simple_blockindex_inject(blocking, frontend): @@ -274,13 +330,13 @@ def test_simple_blockindex_inject(blocking, frontend): end interface integer, intent(in) :: nlon,nlev,nb - real, intent(inout) :: var(nlon,nlev,nb) !... this dummy arg was potentially promoted by a previous transformation + real, intent(inout) :: var(nlon,nlev,4,nb) !... this dummy arg was potentially promoted by a previous transformation integer :: ibl do ibl=1,nb !... this loop was potentially lowered by a previous transformation - var(:,:) = 0. - call compute(nlon,nlev,var) + var(:,:,:) = 0. + call compute(nlon,nlev,var(:,:,1)) enddo end subroutine kernel @@ -290,17 +346,17 @@ def test_simple_blockindex_inject(blocking, frontend): BlockIndexInjectTransformation(blocking).apply(kernel, role='kernel', targets=('compute',)) assigns = FindNodes(Assignment).visit(kernel.body) - assert assigns[0].lhs == 'var(:,:,ibl)' + assert assigns[0].lhs == 'var(:,:,:,ibl)' calls = FindNodes(CallStatement).visit(kernel.body) - assert 'var(:,:,ibl)' in calls[0].arguments + assert 'var(:,:,1,ibl)' in calls[0].arguments @pytest.mark.parametrize('frontend', available_frontends(xfail=[(OMNI, 'OMNI complains about undefined type.')])) def test_blockview_to_fieldview_exception(frontend, horizontal): fcode = """ -subroutine kernel(nlon,nlev,var) +subroutine kernel(nlon,nlev,start,end,var) implicit none interface @@ -311,7 +367,7 @@ def test_blockview_to_fieldview_exception(frontend, horizontal): end subroutine compute end interface - integer, intent(in) :: nlon,nlev + integer, intent(in) :: nlon,nlev,start,end type(wrapped_field) :: var call compute(nlon,nlev,var%p) @@ -324,4 +380,8 @@ def test_blockview_to_fieldview_exception(frontend, horizontal): item.trafo_data['foobar'] = {'definitions': []} with pytest.raises(RuntimeError): BlockViewToFieldViewTransformation(horizontal, key='foobar').apply(kernel, item=item, role='kernel', - targets=('compute',)) + targets=('compute',)) + + with pytest.raises(RuntimeError): + BlockViewToFieldViewTransformation(horizontal, key='foobar').apply(kernel, role='kernel', + targets=('compute',)) From 729120c39378ac9b72baf82267d8447c78cd7dc5 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Fri, 10 May 2024 14:13:18 +0200 Subject: [PATCH 24/37] Rebase cleanup --- loki/transformations/__init__.py | 2 +- .../block_index_transformations.py | 17 +++++++++-------- .../tests/test_block_index_inject.py | 5 ++--- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/loki/transformations/__init__.py b/loki/transformations/__init__.py index bfe6e25fb..46fced6f9 100644 --- a/loki/transformations/__init__.py +++ b/loki/transformations/__init__.py @@ -30,4 +30,4 @@ from loki.transformations.transform_region import * # noqa from loki.transformations.pool_allocator import * # noqa from loki.transformations.utilities import * # noqa -from transformations.block_index_transformations import * # noqa +from loki.transformations.block_index_transformations import * # noqa diff --git a/loki/transformations/block_index_transformations.py b/loki/transformations/block_index_transformations.py index 1c2890222..60b8874da 100644 --- a/loki/transformations/block_index_transformations.py +++ b/loki/transformations/block_index_transformations.py @@ -5,13 +5,14 @@ # granted to it by virtue of its status as an intergovernmental organisation # nor does it submit to any jurisdiction. -from loki import ( - Transformation, ProcedureItem, ir, Module, as_tuple, SymbolAttributes, BasicType, Variable, - RangeIndex, Array, FindVariables, resolve_associates, SubstituteExpressions, FindNodes, - recursive_expression_map_update -) - -from transformations.single_column_coalesced import SCCBaseTransformation +from loki.batch import Transformation, ProcedureItem +from loki.ir import nodes as ir, FindNodes +from loki.module import Module +from loki.tools import as_tuple +from loki.types import SymbolAttributes, BasicType +from loki.expression import Variable, Array, RangeIndex, FindVariables, SubstituteExpressions +from loki.transformations import resolve_associates, recursive_expression_map_update +from loki.transformations.single_column import SCCBaseTransformation __all__ = ['BlockViewToFieldViewTransformation', 'BlockIndexInjectTransformation'] @@ -133,7 +134,7 @@ def _build_field_array_types(self, field_array_module, wrapper_types): decls = (ir.VariableDeclaration(symbols=(pointer_var,)),) decls += (ir.VariableDeclaration(symbols=(contig_pointer_var,)),) - typedefs += (ir.TypeDef(name=_type, body=decls, parent=field_array_module),) + typedefs += (ir.TypeDef(name=_type, body=decls, parent=field_array_module),) # pylint: disable=unexpected-keyword-arg return typedefs diff --git a/loki/transformations/tests/test_block_index_inject.py b/loki/transformations/tests/test_block_index_inject.py index 816b89152..71e9f758c 100644 --- a/loki/transformations/tests/test_block_index_inject.py +++ b/loki/transformations/tests/test_block_index_inject.py @@ -10,10 +10,9 @@ from loki import ( Dimension, gettempdir, Scheduler, OMNI, FindNodes, Assignment, FindVariables, CallStatement, Subroutine, - Item + Item, available_frontends ) -from conftest import available_frontends -from transformations import BlockViewToFieldViewTransformation, BlockIndexInjectTransformation +from loki.transformations import BlockViewToFieldViewTransformation, BlockIndexInjectTransformation @pytest.fixture(scope='module', name='horizontal') def fixture_horizontal(): From 9a8df04baa3b67263f8c874b13a8f86d43a79e95 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Fri, 10 May 2024 15:50:04 +0200 Subject: [PATCH 25/37] InjectBlockIndexTransformation: rename trafo --- loki/transformations/block_index_transformations.py | 8 ++++---- loki/transformations/tests/test_block_index_inject.py | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/loki/transformations/block_index_transformations.py b/loki/transformations/block_index_transformations.py index 60b8874da..694fc264d 100644 --- a/loki/transformations/block_index_transformations.py +++ b/loki/transformations/block_index_transformations.py @@ -14,14 +14,14 @@ from loki.transformations import resolve_associates, recursive_expression_map_update from loki.transformations.single_column import SCCBaseTransformation -__all__ = ['BlockViewToFieldViewTransformation', 'BlockIndexInjectTransformation'] +__all__ = ['BlockViewToFieldViewTransformation', 'InjectBlockIndexTransformation'] class BlockViewToFieldViewTransformation(Transformation): """ A very IFS-specific transformation to replace per-block, i.e. per OpenMP-thread, view pointers with per-field view pointers. It should be noted that this transformation only replaces the view pointers but does not actually insert the block index into the promoted view pointers. Therefore this transformation must always be followed by - the :any:`BlockIndexInjectTransformation`. + the :any:`InjectBlockIndexTransformation`. For example, the following code: @@ -247,7 +247,7 @@ def process_kernel(self, routine, item, successors, targets, exclude_arrays): successors, targets, exclude_arrays) -class BlockIndexInjectTransformation(Transformation): +class InjectBlockIndexTransformation(Transformation): """ A transformation pass to inject the block-index in arrays promoted by a previous transformation pass. As such, this transformation also relies on the block-index, or a known alias, being *already* present in routines that @@ -323,7 +323,7 @@ class BlockIndexInjectTransformation(Transformation): Specify a different identifier under which trafo_data is stored """ - _key = 'BlockIndexInjectTransformation' + _key = 'InjectBlockIndexTransformation' """Default identifier for trafo_data entry""" # This trafo only operates on procedures diff --git a/loki/transformations/tests/test_block_index_inject.py b/loki/transformations/tests/test_block_index_inject.py index 71e9f758c..86e6644ea 100644 --- a/loki/transformations/tests/test_block_index_inject.py +++ b/loki/transformations/tests/test_block_index_inject.py @@ -12,7 +12,7 @@ Dimension, gettempdir, Scheduler, OMNI, FindNodes, Assignment, FindVariables, CallStatement, Subroutine, Item, available_frontends ) -from loki.transformations import BlockViewToFieldViewTransformation, BlockIndexInjectTransformation +from loki.transformations import BlockViewToFieldViewTransformation, InjectBlockIndexTransformation @pytest.fixture(scope='module', name='horizontal') def fixture_horizontal(): @@ -227,7 +227,7 @@ def test_blockview_to_fieldview_pipeline(horizontal, blocking, config, frontend, paths=(blockview_to_fieldview_code[0],), config=config, seed_routines='driver', frontend=frontend ) scheduler.process(BlockViewToFieldViewTransformation(horizontal, global_gfl_ptr=True)) - scheduler.process(BlockIndexInjectTransformation(blocking)) + scheduler.process(InjectBlockIndexTransformation(blocking)) kernel = scheduler['#kernel'].ir aliased_bounds = not blockview_to_fieldview_code[1] @@ -342,7 +342,7 @@ def test_simple_blockindex_inject(blocking, frontend): """ kernel = Subroutine.from_source(fcode, frontend=frontend) - BlockIndexInjectTransformation(blocking).apply(kernel, role='kernel', targets=('compute',)) + InjectBlockIndexTransformation(blocking).apply(kernel, role='kernel', targets=('compute',)) assigns = FindNodes(Assignment).visit(kernel.body) assert assigns[0].lhs == 'var(:,:,:,ibl)' From d31043ab684a8a8cdb39e05d1e521cf6b6b1f1b0 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Fri, 10 May 2024 15:54:19 +0200 Subject: [PATCH 26/37] block_index_trafos: remove trafo_data key from constructor args --- loki/transformations/block_index_transformations.py | 13 +++---------- .../tests/test_block_index_inject.py | 6 +++--- 2 files changed, 6 insertions(+), 13 deletions(-) diff --git a/loki/transformations/block_index_transformations.py b/loki/transformations/block_index_transformations.py index 694fc264d..37d59ac60 100644 --- a/loki/transformations/block_index_transformations.py +++ b/loki/transformations/block_index_transformations.py @@ -55,15 +55,13 @@ class BlockViewToFieldViewTransformation(Transformation): """ _key = 'BlockViewToFieldViewTransformation' - """Default identifier for trafo_data entry""" + """Identifier for trafo_data entry""" item_filter = (ProcedureItem,) - def __init__(self, horizontal, global_gfl_ptr=False, key=None): + def __init__(self, horizontal, global_gfl_ptr=False): self.horizontal = horizontal self.global_gfl_ptr = global_gfl_ptr - if key: - self._key = key @staticmethod def get_parent_typedef(var, symbol_map): @@ -323,16 +321,11 @@ class InjectBlockIndexTransformation(Transformation): Specify a different identifier under which trafo_data is stored """ - _key = 'InjectBlockIndexTransformation' - """Default identifier for trafo_data entry""" - # This trafo only operates on procedures item_filter = (ProcedureItem,) - def __init__(self, block_dim, key=None): + def __init__(self, block_dim): self.block_dim = block_dim - if key: - self._key = key def transform_subroutine(self, routine, **kwargs): diff --git a/loki/transformations/tests/test_block_index_inject.py b/loki/transformations/tests/test_block_index_inject.py index 86e6644ea..289aceb27 100644 --- a/loki/transformations/tests/test_block_index_inject.py +++ b/loki/transformations/tests/test_block_index_inject.py @@ -376,11 +376,11 @@ def test_blockview_to_fieldview_exception(frontend, horizontal): kernel = Subroutine.from_source(fcode, frontend=frontend) item = Item(name='#kernel', source=kernel) - item.trafo_data['foobar'] = {'definitions': []} + item.trafo_data['BlockViewToFieldViewTransformation'] = {'definitions': []} with pytest.raises(RuntimeError): - BlockViewToFieldViewTransformation(horizontal, key='foobar').apply(kernel, item=item, role='kernel', + BlockViewToFieldViewTransformation(horizontal).apply(kernel, item=item, role='kernel', targets=('compute',)) with pytest.raises(RuntimeError): - BlockViewToFieldViewTransformation(horizontal, key='foobar').apply(kernel, role='kernel', + BlockViewToFieldViewTransformation(horizontal).apply(kernel, role='kernel', targets=('compute',)) From 7e673213f892cd5257d9ac528d0c50d75800959e Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Sat, 11 May 2024 18:07:40 +0200 Subject: [PATCH 27/37] Enrich derived-type variable declarations --- loki/program_unit.py | 21 ++++++++++++++++++- .../block_index_transformations.py | 16 ++------------ .../tests/test_block_index_inject.py | 2 +- 3 files changed, 23 insertions(+), 16 deletions(-) diff --git a/loki/program_unit.py b/loki/program_unit.py index f73063020..4ad0d810b 100644 --- a/loki/program_unit.py +++ b/loki/program_unit.py @@ -6,8 +6,9 @@ # nor does it submit to any jurisdiction. from abc import abstractmethod +from collections import defaultdict -from loki.expression import Variable +from loki.expression import Variable, SubstituteExpressions from loki.frontend import ( Frontend, parse_omni_source, parse_ofp_source, parse_fparser_source, RegexParserClass, preprocess_cpp, sanitize_input @@ -327,6 +328,9 @@ def enrich(self, definitions, recurse=False): """ definitions_map = CaseInsensitiveDict((r.name, r) for r in as_tuple(definitions)) + decls = FindNodes(ir.VariableDeclaration).visit(self.spec) + decl_map = defaultdict(dict) + for imprt in self.imports: if not (module := definitions_map.get(imprt.module)): # Skip modules that are not available in the definitions list @@ -361,6 +365,13 @@ def enrich(self, definitions, recurse=False): updated_symbol_attrs[local_name] = symbol.type.clone( dtype=remote_node.dtype, imported=True, module=module ) + + # Update the DataType (type.dtype) of relevant derived type variables + for decl in decls: + for sym in decl.symbols: + if sym.type.dtype.name.lower() == remote_node.dtype.name.lower(): + decl_map[decl].update({sym: sym.clone(type=sym.type.clone(dtype=remote_node.dtype))}) + elif hasattr(remote_node, 'type'): # This is a global variable or interface import updated_symbol_attrs[local_name] = remote_node.type.clone( @@ -375,6 +386,14 @@ def enrich(self, definitions, recurse=False): # expression nodes imprt._update(symbols=tuple(symbol.clone() for symbol in imprt.symbols)) + if decl_map: + # DataType's are not stored directly in the SymbolTable, thus updating the + # imported symbols is not enough to update the type.dtype of expression nodes. + # Therefore we must also update the variable declaration with the updated symbol + for decl, symbol_map in decl_map.items(): + _symbols = SubstituteExpressions(symbol_map).visit(decl.symbols) + decl._update(symbols=as_tuple(_symbols)) + # Update any symbol table entries that have been inherited from the parent if self.parent: updated_symbol_attrs = {} diff --git a/loki/transformations/block_index_transformations.py b/loki/transformations/block_index_transformations.py index 37d59ac60..31aee75e4 100644 --- a/loki/transformations/block_index_transformations.py +++ b/loki/transformations/block_index_transformations.py @@ -63,17 +63,6 @@ def __init__(self, horizontal, global_gfl_ptr=False): self.horizontal = horizontal self.global_gfl_ptr = global_gfl_ptr - @staticmethod - def get_parent_typedef(var, symbol_map): - """Utility method to retrieve derived-type definition of parent type.""" - - if not var.parent.type.dtype.typedef is BasicType.DEFERRED: - return var.parent.type.dtype.typedef - if (_parent_type := symbol_map.get(var.parent.type.dtype.name, None)): - if not _parent_type.type.dtype.typedef is BasicType.DEFERRED: - return _parent_type.type.dtype.typedef - raise RuntimeError(f'Container data-type {var.parent.type.dtype.name} not enriched') - def transform_subroutine(self, routine, **kwargs): if not (item := kwargs.get('item', None)): @@ -205,9 +194,8 @@ def process_body(self, body, symbol_map, definitions, successors, targets, exclu if any(v in d.shape for v in self.horizontal.size_expressions) and a.parents] # replace per-block view pointers with full field pointers - vmap = {var: - var.clone(name=var.name_parts[-1] + '_FIELD', - type=self.get_parent_typedef(var, symbol_map).variable_map[var.name_parts[-1] + '_FIELD'].type) + vmap = {var: var.clone(name=var.name_parts[-1] + '_FIELD', + type=var.parent.variable_map[var.name_parts[-1] + '_FIELD'].type) for var in _vars} # replace thread-private GFL_PTR with global diff --git a/loki/transformations/tests/test_block_index_inject.py b/loki/transformations/tests/test_block_index_inject.py index 289aceb27..a7fb5c880 100644 --- a/loki/transformations/tests/test_block_index_inject.py +++ b/loki/transformations/tests/test_block_index_inject.py @@ -377,7 +377,7 @@ def test_blockview_to_fieldview_exception(frontend, horizontal): kernel = Subroutine.from_source(fcode, frontend=frontend) item = Item(name='#kernel', source=kernel) item.trafo_data['BlockViewToFieldViewTransformation'] = {'definitions': []} - with pytest.raises(RuntimeError): + with pytest.raises(KeyError): BlockViewToFieldViewTransformation(horizontal).apply(kernel, item=item, role='kernel', targets=('compute',)) From 07366c021005f3ad8ea5ab1e8fa839764562bee9 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Sat, 11 May 2024 18:22:52 +0200 Subject: [PATCH 28/37] BlockViewToFieldViewTrafo: update documentation --- .../block_index_transformations.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/loki/transformations/block_index_transformations.py b/loki/transformations/block_index_transformations.py index 31aee75e4..9f822cff8 100644 --- a/loki/transformations/block_index_transformations.py +++ b/loki/transformations/block_index_transformations.py @@ -39,9 +39,21 @@ class BlockViewToFieldViewTransformation(Transformation): mystruct%p_field(jlon,:) = 0. enddo - Where the rank of ``my_struct%p_field`` is one greater than that of ``my_struct%p``. Specific arrays in individual - routines can also be marked for exclusion from this transformation by assigning them to the `exclude_arrays` list - in the :any:`SchedulerConfig`. + As the rank of ``my_struct%p_field`` is one greater than that of ``my_struct%p``, we would need to also apply + the :any:`InjectBlockIndexTransformation` to obtain semantically correct code: + + .. code-block:: fortran + + do jlon=1,nproma + mystruct%p_field(jlon,:,ibl) = 0. + enddo + + Specific arrays in individual routines can also be marked for exclusion from this transformation by assigning + them to the `exclude_arrays` list in the :any:`SchedulerConfig`. + + This transformation also creates minimal definitions of FIELD API wrappers (i.e. FIELD_RANKSUFF_ARRAY) and + uses them to enrich the :any:`DataType` of relevant variable declarations and expression nodes. This is + required because FIELD API can be built independently of library targets Loki would typically operate on. Parameters ---------- From f30b37654a3073574ff74a32f66fb9bfae964bc7 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Sat, 11 May 2024 18:36:38 +0200 Subject: [PATCH 29/37] block_index_trafos: misc cleanup --- .../block_index_transformations.py | 65 +++++++++---------- 1 file changed, 29 insertions(+), 36 deletions(-) diff --git a/loki/transformations/block_index_transformations.py b/loki/transformations/block_index_transformations.py index 9f822cff8..e18038fd9 100644 --- a/loki/transformations/block_index_transformations.py +++ b/loki/transformations/block_index_transformations.py @@ -187,8 +187,8 @@ def build_ydvars_global_gfl_ptr(self, var): parent = self.build_ydvars_global_gfl_ptr(parent) _type = var.type - if 'gfl_ptr' in var.name.lower().split('%')[-1]: - _type = parent.type.dtype.typedef.variable_map['gfl_ptr_g'].type + if 'gfl_ptr' in var.basename.lower(): + _type = parent.variable_map['gfl_ptr_g'].type return var.clone(name=var.name.upper().replace('GFL_PTR', 'GFL_PTR_G'), parent=parent, type=_type) @@ -200,10 +200,11 @@ def process_body(self, body, symbol_map, definitions, successors, targets, exclu if isinstance(var, Array) and var.parents and self.horizontal.index in var.dimensions] # build list of type-bound view pointers passed as subroutine arguments - for call in [call for call in FindNodes(ir.CallStatement).visit(body) if call.name in targets]: - _args = {a: d for d, a in call.arg_map.items() if isinstance(d, Array)} - _vars += [a for a, d in _args.items() - if any(v in d.shape for v in self.horizontal.size_expressions) and a.parents] + for call in FindNodes(ir.CallStatement).visit(body): + if call.name in targets: + _args = {a: d for d, a in call.arg_map.items() if isinstance(d, Array)} + _vars += [a for a, d in _args.items() + if any(v in d.shape for v in self.horizontal.size_expressions) and a.parents] # replace per-block view pointers with full field pointers vmap = {var: var.clone(name=var.name_parts[-1] + '_FIELD', @@ -341,24 +342,13 @@ def transform_subroutine(self, routine, **kwargs): #TODO: we also need to process any code inside a loki/acdc parallel pragma at the driver layer - @staticmethod - def _update_expr_map(var, rank, index): - """ - Return a map with the block-index appended to the variable's dimensions. - """ - - if getattr(var, 'dimensions', None): - return {var: var.clone(dimensions=var.dimensions + as_tuple(index))} - return {var: - var.clone(dimensions=((RangeIndex(children=(None, None)),) * (rank - 1)) + as_tuple(index))} - @staticmethod def get_call_arg_rank(arg): """ Utility to retrieve the local rank of a :any:`CallStatement` argument. """ - rank = len(arg.shape) if getattr(arg, 'shape', None) else 0 + rank = len(getattr(arg, 'shape', ())) if getattr(arg, 'dimensions', None): # We assume here that the callstatement is free of sequence association rank = rank - len([d for d in arg.dimensions if not isinstance(d, RangeIndex)]) @@ -385,26 +375,29 @@ def process_body(self, body, block_index, targets, exclude_arrays): # First get rank mismatched call statement args vmap = {} - for call in [call for call in FindNodes(ir.CallStatement).visit(body) if call.name in targets]: - for dummy, arg in call.arg_map.items(): - arg_rank = self.get_call_arg_rank(arg) - dummy_rank = len(dummy.shape) if getattr(dummy, 'shape', None) else 0 - if arg_rank - 1 == dummy_rank: - vmap.update(self._update_expr_map(arg, arg_rank, block_index)) + for call in FindNodes(ir.CallStatement).visit(body): + if call.name in targets: + for dummy, arg in call.arg_map.items(): + arg_rank = self.get_call_arg_rank(arg) + dummy_rank = len(getattr(dummy, 'shape', ())) + if arg_rank - 1 == dummy_rank: + dimensions = getattr(arg, 'dimensions', None) or ((RangeIndex((None, None)),) * (arg_rank - 1)) + vmap.update({arg: arg.clone(dimensions=dimensions + as_tuple(block_index))}) # Now get the rest of the variables - for var in [var for var in FindVariables().visit(body) - if getattr(var, 'dimensions', None) and not var in call_args]: - - local_rank = len(var.dimensions) - decl_rank = local_rank - # we assume here that all derived-type components we wish to transform - # have been parsed - if getattr(var, 'shape', None): - decl_rank = len(var.shape) - - if local_rank == decl_rank - 1: - vmap.update(self._update_expr_map(var, decl_rank, block_index)) + for var in FindVariables().visit(body): + if getattr(var, 'dimensions', None) and not var in call_args: + + local_rank = len(var.dimensions) + decl_rank = local_rank + # we assume here that all derived-type components we wish to transform + # have been parsed + if getattr(var, 'shape', None): + decl_rank = len(var.shape) + + if local_rank == decl_rank - 1: + dimensions = getattr(var, 'dimensions', None) or ((RangeIndex((None, None)),) * (decl_rank - 1)) + vmap.update({var: var.clone(dimensions=dimensions + as_tuple(block_index))}) # filter out arrays marked for exclusion vmap = {k: v for k, v in vmap.items() if not any(e in k for e in exclude_arrays)} From 8787f0db682edd6f4eea60e8b492fba849cb935b Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Sat, 11 May 2024 18:41:03 +0200 Subject: [PATCH 30/37] SCRIPTS: remove BLOCKVIEW_TO_FIELDVIEW CLI arg --- cmake/loki_transform.cmake | 6 ------ cmake/loki_transform_helpers.cmake | 4 ---- scripts/loki_transform.py | 10 ++-------- 3 files changed, 2 insertions(+), 18 deletions(-) diff --git a/cmake/loki_transform.cmake b/cmake/loki_transform.cmake index 6083b4730..8cfebc84b 100644 --- a/cmake/loki_transform.cmake +++ b/cmake/loki_transform.cmake @@ -48,7 +48,6 @@ function( loki_transform ) set( options CPP DATA_OFFLOAD REMOVE_OPENMP ASSUME_DEVICEPTR TRIM_VECTOR_SECTIONS GLOBAL_VAR_OFFLOAD REMOVE_DERIVED_ARGS INLINE_MEMBERS RESOLVE_SEQUENCE_ASSOCIATION DERIVE_ARGUMENT_ARRAY_SHAPE - BLOCKVIEW_TO_FIELDVIEW ) set( oneValueArgs COMMAND MODE DIRECTIVE FRONTEND CONFIG BUILDDIR @@ -227,7 +226,6 @@ function( loki_transform_target ) set( options NO_PLAN_SOURCEDIR COPY_UNMODIFIED CPP CPP_PLAN INLINE_MEMBERS RESOLVE_SEQUENCE_ASSOCIATION DERIVE_ARGUMENT_ARRAY_SHAPE TRIM_VECTOR_SECTIONS GLOBAL_VAR_OFFLOAD - BLOCKVIEW_TO_FIELDVIEW ) set( single_value_args TARGET COMMAND MODE DIRECTIVE FRONTEND CONFIG PLAN ) set( multi_value_args SOURCES HEADERS DEFINITIONS INCLUDES ) @@ -313,10 +311,6 @@ function( loki_transform_target ) list( APPEND _TRANSFORM_OPTIONS GLOBAL_VAR_OFFLOAD ) endif() - if( _PAR_T_BLOCKVIEW_TO_FIELDVIEW ) - list( APPEND _TRANSFORM_OPTIONS BLOCKVIEW_TO_FIELDVIEW ) - endif() - loki_transform( COMMAND ${_PAR_T_COMMAND} OUTPUT ${LOKI_SOURCES_TO_APPEND} diff --git a/cmake/loki_transform_helpers.cmake b/cmake/loki_transform_helpers.cmake index a95416478..680ae0e72 100644 --- a/cmake/loki_transform_helpers.cmake +++ b/cmake/loki_transform_helpers.cmake @@ -120,10 +120,6 @@ macro( _loki_transform_parse_options ) list( APPEND _ARGS --derive-argument-array-shape ) endif() - if( _PAR_BLOCKVIEW_TO_FIELDVIEW ) - list( APPEND _ARGS --blockview-to-fieldview ) - endif() - endmacro() ############################################################################## diff --git a/scripts/loki_transform.py b/scripts/loki_transform.py index 32a0e9438..f9b09d660 100644 --- a/scripts/loki_transform.py +++ b/scripts/loki_transform.py @@ -120,14 +120,12 @@ def cli(debug): help="Recursively derive explicit shape dimension for argument arrays") @click.option('--eliminate-dead-code/--no-eliminate-dead-code', default=True, help='Perform dead code elimination, where unreachable branches are trimmed from the code.') -@click.option('--blockview-to-fieldview', is_flag=True, default=False, - help='Replace per-block view pointers with per-field view pointers.') def convert( mode, config, build, source, header, cpp, directive, include, define, omni_include, xmod, data_offload, remove_openmp, assume_deviceptr, frontend, trim_vector_sections, global_var_offload, remove_derived_args, inline_members, inline_marked, resolve_sequence_association, resolve_sequence_association_inlined_calls, - derive_argument_array_shape, eliminate_dead_code, blockview_to_fieldview + derive_argument_array_shape, eliminate_dead_code ): """ Batch-processing mode for Fortran-to-Fortran transformations that @@ -222,17 +220,13 @@ def convert( ) scheduler.process(transformation=sanitise_trafo) - if blockview_to_fieldview: - assert config.pipelines['blockview_to_fieldview'] - scheduler.process( config.pipelines['blockview_to_fieldview'] ) - # Perform source-inlining either from CLI arguments or from config inline_trafo = scheduler.config.transformations.get('InlineTransformation', None) if not inline_trafo: inline_trafo = InlineTransformation( inline_internals=inline_members, inline_marked=inline_marked, remove_dead_code=eliminate_dead_code, allowed_aliases=horizontal.index, - resolve_sequence_association=resolve_sequence_association_inlined_calls + resolve_sequence_association=resolve_sequence_association_inlined_calls ) scheduler.process(transformation=inline_trafo) From 212677bb38e9acaf8c772a7f8f08760fe1523fef Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Sat, 11 May 2024 18:47:23 +0200 Subject: [PATCH 31/37] Angry linting gods --- loki/program_unit.py | 2 +- loki/transformations/block_index_transformations.py | 11 ++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/loki/program_unit.py b/loki/program_unit.py index 4ad0d810b..9753a2ba4 100644 --- a/loki/program_unit.py +++ b/loki/program_unit.py @@ -389,7 +389,7 @@ def enrich(self, definitions, recurse=False): if decl_map: # DataType's are not stored directly in the SymbolTable, thus updating the # imported symbols is not enough to update the type.dtype of expression nodes. - # Therefore we must also update the variable declaration with the updated symbol + # Therefore we must also update the variable declaration with the updated symbol for decl, symbol_map in decl_map.items(): _symbols = SubstituteExpressions(symbol_map).visit(decl.symbols) decl._update(symbols=as_tuple(_symbols)) diff --git a/loki/transformations/block_index_transformations.py b/loki/transformations/block_index_transformations.py index e18038fd9..978d04968 100644 --- a/loki/transformations/block_index_transformations.py +++ b/loki/transformations/block_index_transformations.py @@ -11,8 +11,9 @@ from loki.tools import as_tuple from loki.types import SymbolAttributes, BasicType from loki.expression import Variable, Array, RangeIndex, FindVariables, SubstituteExpressions -from loki.transformations import resolve_associates, recursive_expression_map_update -from loki.transformations.single_column import SCCBaseTransformation +from loki.transformations.sanitise import resolve_associates +from loki.transformations.utilities import recursive_expression_map_update +from loki.transformations.single_column.base import SCCBaseTransformation __all__ = ['BlockViewToFieldViewTransformation', 'InjectBlockIndexTransformation'] @@ -193,7 +194,7 @@ def build_ydvars_global_gfl_ptr(self, var): return var.clone(name=var.name.upper().replace('GFL_PTR', 'GFL_PTR_G'), parent=parent, type=_type) - def process_body(self, body, symbol_map, definitions, successors, targets, exclude_arrays): + def process_body(self, body, definitions, successors, targets, exclude_arrays): # build list of type-bound array access using the horizontal index _vars = [var for var in FindVariables().visit(body) @@ -242,7 +243,7 @@ def process_kernel(self, routine, item, successors, targets, exclude_arrays): SCCBaseTransformation.resolve_vector_dimension(routine, loop_variable=v_index, bounds=bounds) # for kernels we process the entire body - routine.body = self.process_body(routine.body, routine.symbol_map, item.trafo_data[self._key]['definitions'], + routine.body = self.process_body(routine.body, item.trafo_data[self._key]['definitions'], successors, targets, exclude_arrays) @@ -394,7 +395,7 @@ def process_body(self, body, block_index, targets, exclude_arrays): # have been parsed if getattr(var, 'shape', None): decl_rank = len(var.shape) - + if local_rank == decl_rank - 1: dimensions = getattr(var, 'dimensions', None) or ((RangeIndex((None, None)),) * (decl_rank - 1)) vmap.update({var: var.clone(dimensions=dimensions + as_tuple(block_index))}) From e71adc49a676f139259b8f339bd5efbe871a762a Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Sun, 12 May 2024 11:29:56 +0200 Subject: [PATCH 32/37] SCRIPTS: sanitize mode string for custom pipelines --- scripts/loki_transform.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/loki_transform.py b/scripts/loki_transform.py index f9b09d660..fc4c86ca6 100644 --- a/scripts/loki_transform.py +++ b/scripts/loki_transform.py @@ -183,6 +183,8 @@ def convert( scheduler.process( config.pipelines[mode] ) + mode = mode.replace('-', '_') # Sanitize mode string + # Write out all modified source files into the build directory file_write_trafo = FileWriteTransformation(builddir=build, mode=mode) scheduler.process(transformation=file_write_trafo) From 1e05c1b2362c8889b6643ff1e6cf506ea11a6cc6 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Sun, 12 May 2024 11:30:50 +0200 Subject: [PATCH 33/37] Change FindVariables searches to non-unique to restore SL functionality --- loki/transformations/block_index_transformations.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/loki/transformations/block_index_transformations.py b/loki/transformations/block_index_transformations.py index 978d04968..812ccd079 100644 --- a/loki/transformations/block_index_transformations.py +++ b/loki/transformations/block_index_transformations.py @@ -197,7 +197,7 @@ def build_ydvars_global_gfl_ptr(self, var): def process_body(self, body, definitions, successors, targets, exclude_arrays): # build list of type-bound array access using the horizontal index - _vars = [var for var in FindVariables().visit(body) + _vars = [var for var in FindVariables(unique=False).visit(body) if isinstance(var, Array) and var.parents and self.horizontal.index in var.dimensions] # build list of type-bound view pointers passed as subroutine arguments @@ -215,7 +215,7 @@ def process_body(self, body, definitions, successors, targets, exclude_arrays): # replace thread-private GFL_PTR with global if self.global_gfl_ptr: vmap.update({v: self.build_ydvars_global_gfl_ptr(vmap.get(v, v)) - for v in FindVariables().visit(body) if 'ydvars%gfl_ptr' in v.name.lower()}) + for v in FindVariables(unique=False).visit(body) if 'ydvars%gfl_ptr' in v.name.lower()}) vmap = recursive_expression_map_update(vmap) # filter out arrays marked for exclusion @@ -386,7 +386,7 @@ def process_body(self, body, block_index, targets, exclude_arrays): vmap.update({arg: arg.clone(dimensions=dimensions + as_tuple(block_index))}) # Now get the rest of the variables - for var in FindVariables().visit(body): + for var in FindVariables(unique=False).visit(body): if getattr(var, 'dimensions', None) and not var in call_args: local_rank = len(var.dimensions) From 2c01d697122ef41c7901413796945b0cb70d2023 Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Mon, 13 May 2024 11:54:40 +0200 Subject: [PATCH 34/37] Add a test for derived type enrichment --- loki/tests/test_subroutine.py | 99 ++++++++++++++++++++++++++++------- 1 file changed, 79 insertions(+), 20 deletions(-) diff --git a/loki/tests/test_subroutine.py b/loki/tests/test_subroutine.py index d055bcd01..6593c0d0c 100644 --- a/loki/tests/test_subroutine.py +++ b/loki/tests/test_subroutine.py @@ -12,15 +12,16 @@ from loki import ( Sourcefile, Module, Subroutine, FindVariables, FindNodes, Section, - CallStatement, BasicType, Array, Scalar, Variable, + Array, Scalar, Variable, SymbolAttributes, StringLiteral, fgen, fexprgen, VariableDeclaration, Transformer, FindTypedSymbols, - ProcedureSymbol, ProcedureType, StatementFunction, - normalize_range_indexing, DeferredTypeSymbol, Assignment, - Interface + ProcedureSymbol, StatementFunction, + normalize_range_indexing, DeferredTypeSymbol ) from loki.build import jit_compile, jit_compile_lib, clean_test from loki.frontend import available_frontends, OFP, OMNI, REGEX +from loki.types import BasicType, DerivedType, ProcedureType +from loki.ir import nodes as ir @pytest.fixture(scope='module', name='here') @@ -767,7 +768,7 @@ def test_routine_call_arrays(header_path, frontend): """ header = Sourcefile.from_file(header_path, frontend=frontend)['header'] routine = Subroutine.from_source(fcode, frontend=frontend, definitions=header) - call = FindNodes(CallStatement).visit(routine.body)[0] + call = FindNodes(ir.CallStatement).visit(routine.body)[0] assert str(call.arguments[0]) == 'x' assert str(call.arguments[1]) == 'y' @@ -797,7 +798,7 @@ def test_call_no_arg(frontend): call abort end subroutine routine_call_no_arg """) - calls = FindNodes(CallStatement).visit(routine.body) + calls = FindNodes(ir.CallStatement).visit(routine.body) assert len(calls) == 1 assert calls[0].arguments == () assert calls[0].kwarguments == () @@ -813,7 +814,7 @@ def test_call_kwargs(frontend): call mpl_init(kprocs=kprocs, cdstring='routine_call_kwargs') end subroutine routine_call_kwargs """) - calls = FindNodes(CallStatement).visit(routine.body) + calls = FindNodes(ir.CallStatement).visit(routine.body) assert len(calls) == 1 assert calls[0].name == 'mpl_init' @@ -838,7 +839,7 @@ def test_call_args_kwargs(frontend): call mpl_send(pbuf, ktag, kdest, cdstring='routine_call_args_kwargs') end subroutine routine_call_args_kwargs """) - calls = FindNodes(CallStatement).visit(routine.body) + calls = FindNodes(ir.CallStatement).visit(routine.body) assert len(calls) == 1 assert calls[0].name == 'mpl_send' assert len(calls[0].arguments) == 3 @@ -1520,7 +1521,7 @@ def test_subroutine_stmt_func(here, frontend): routine.name += f'_{frontend!s}' # Make sure the statement function injection doesn't invalidate source - for assignment in FindNodes(Assignment).visit(routine.body): + for assignment in FindNodes(ir.Assignment).visit(routine.body): assert assignment.source is not None # OMNI inlines statement functions, so we can only check correct representation @@ -1958,7 +1959,7 @@ def test_subroutine_clone_contained(frontend): kernels = driver.subroutines def _verify_call_enrichment(driver_, kernels_): - calls = FindNodes(CallStatement).visit(driver_.body) + calls = FindNodes(ir.CallStatement).visit(driver_.body) assert len(calls) == 2 for call in calls: @@ -2048,12 +2049,12 @@ def test_enrich_explicit_interface(frontend): driver.enrich(kernel) # check if call is enriched correctly - calls = FindNodes(CallStatement).visit(driver.body) + calls = FindNodes(ir.CallStatement).visit(driver.body) assert calls[0].routine is kernel # check if the procedure symbol in the interface block has been removed from # driver's symbol table - intfs = FindNodes(Interface).visit(driver.spec) + intfs = FindNodes(ir.Interface).visit(driver.spec) assert not intfs[0].body[0].parent # check that call still points to correct subroutine @@ -2065,6 +2066,64 @@ def test_enrich_explicit_interface(frontend): assert calls[0].routine is kernel +@pytest.mark.parametrize('frontend', available_frontends()) +def test_enrich_derived_types(tmp_path, frontend): + fcode = """ +subroutine enrich_derived_types_routine(yda_array) +use field_array_module, only : field_3rb_array +implicit none +type(field_3rb_array), intent(inout) :: yda_array +yda_array%p = 0. +end subroutine enrich_derived_types_routine + """.strip() + + fcode_module = """ +module field_array_module +implicit none +type field_3rb_array + real, pointer :: p(:,:,:) +end type field_3rb_array +end module field_array_module + """.strip() + + module = Module.from_source(fcode_module, frontend=frontend, xmods=[tmp_path]) + routine = Subroutine.from_source(fcode, frontend=frontend, xmods=[tmp_path]) + + # The derived type is a dangling import + field_3rb_symbol = routine.symbol_map['field_3rb_array'] + assert field_3rb_symbol.type.imported + assert field_3rb_symbol.type.module is None + assert field_3rb_symbol.type.dtype is BasicType.DEFERRED + + # The variable type is recognized as a derived type but without enrichment + yda_array = routine.variable_map['yda_array'] + assert isinstance(yda_array.type.dtype, DerivedType) + assert routine.variable_map['yda_array'].type.dtype.typedef is BasicType.DEFERRED + + # The pointer member has no type information + yda_array_p = routine.resolve_typebound_var('yda_array%p') + assert yda_array_p.type.dtype is BasicType.DEFERRED + assert yda_array_p.type.shape is None + + # Pick out the typedef (before enrichment to validate object consistency) + field_3rb_tdef = module['field_3rb_array'] + assert isinstance(field_3rb_tdef, ir.TypeDef) + + # Enrich the routine with module definitions + routine.enrich(module) + + # Ensure the imported type symbol is correctly enriched + assert field_3rb_symbol.type.imported + assert field_3rb_symbol.type.module is module + assert isinstance(field_3rb_symbol.type.dtype, DerivedType) + + # Ensure the information has been propagated to other variables + assert isinstance(yda_array.type.dtype, DerivedType) + assert yda_array.type.dtype.typedef is field_3rb_tdef + assert yda_array_p.type.dtype is BasicType.REAL + assert yda_array_p.type.shape == (':', ':', ':') + + @pytest.mark.parametrize('frontend', available_frontends( xfail=[(OMNI, 'OMNI cannot handle external type defs without source')] )) @@ -2099,15 +2158,15 @@ def test_subroutine_deep_clone(frontend): # Replace all assignments with dummy calls map_nodes={} - for assign in FindNodes(Assignment).visit(new_routine.body): - map_nodes[assign] = CallStatement( + for assign in FindNodes(ir.Assignment).visit(new_routine.body): + map_nodes[assign] = ir.CallStatement( name=DeferredTypeSymbol(name='testcall'), arguments=(assign.lhs,), scope=new_routine ) new_routine.body = Transformer(map_nodes).visit(new_routine.body) # Ensure that the original copy of the routine remains unaffected - assert len(FindNodes(Assignment).visit(routine.body)) == 3 - assert len(FindNodes(Assignment).visit(new_routine.body)) == 0 + assert len(FindNodes(ir.Assignment).visit(routine.body)) == 3 + assert len(FindNodes(ir.Assignment).visit(new_routine.body)) == 0 @pytest.mark.parametrize('frontend', available_frontends()) def test_call_args_kwargs_conversion(frontend): @@ -2162,20 +2221,20 @@ def test_call_args_kwargs_conversion(frontend): len_kwargs = (0, 7, 7, 2) # sort kwargs - for i_call, call in enumerate(FindNodes(CallStatement).visit(driver.body)): + for i_call, call in enumerate(FindNodes(ir.CallStatement).visit(driver.body)): assert call.check_kwarguments_order() == kwargs_in_order[i_call] call.sort_kwarguments() # check calls with sorted kwargs - for i_call, call in enumerate(FindNodes(CallStatement).visit(driver.body)): + for i_call, call in enumerate(FindNodes(ir.CallStatement).visit(driver.body)): assert tuple(arg[1].name for arg in call.arg_iter()) == call_args assert len(call.kwarguments) == len_kwargs[i_call] # kwarg to arg conversion - for call in FindNodes(CallStatement).visit(driver.body): + for call in FindNodes(ir.CallStatement).visit(driver.body): call.convert_kwargs_to_args() # check calls with kwargs converted to args - for call in FindNodes(CallStatement).visit(driver.body): + for call in FindNodes(ir.CallStatement).visit(driver.body): assert tuple(arg.name for arg in call.arguments) == call_args assert call.kwarguments == () From 998d1f433d0abafaa506c086663204fc364c3359 Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Mon, 13 May 2024 14:49:30 +0200 Subject: [PATCH 35/37] Fix enrichment of derived type dtypes for local variables --- loki/program_unit.py | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/loki/program_unit.py b/loki/program_unit.py index 9753a2ba4..b5b312e3a 100644 --- a/loki/program_unit.py +++ b/loki/program_unit.py @@ -328,9 +328,6 @@ def enrich(self, definitions, recurse=False): """ definitions_map = CaseInsensitiveDict((r.name, r) for r in as_tuple(definitions)) - decls = FindNodes(ir.VariableDeclaration).visit(self.spec) - decl_map = defaultdict(dict) - for imprt in self.imports: if not (module := definitions_map.get(imprt.module)): # Skip modules that are not available in the definitions list @@ -365,13 +362,13 @@ def enrich(self, definitions, recurse=False): updated_symbol_attrs[local_name] = symbol.type.clone( dtype=remote_node.dtype, imported=True, module=module ) - - # Update the DataType (type.dtype) of relevant derived type variables - for decl in decls: - for sym in decl.symbols: - if sym.type.dtype.name.lower() == remote_node.dtype.name.lower(): - decl_map[decl].update({sym: sym.clone(type=sym.type.clone(dtype=remote_node.dtype))}) - + # Update dtype for local variables using this type + variables_with_this_type = { + name: type_.clone(dtype=remote_node.dtype) + for name, type_ in self.symbol_attrs.items() + if getattr(type_.dtype, 'name') == remote_node.dtype.name + } + updated_symbol_attrs.update(variables_with_this_type) elif hasattr(remote_node, 'type'): # This is a global variable or interface import updated_symbol_attrs[local_name] = remote_node.type.clone( @@ -386,14 +383,6 @@ def enrich(self, definitions, recurse=False): # expression nodes imprt._update(symbols=tuple(symbol.clone() for symbol in imprt.symbols)) - if decl_map: - # DataType's are not stored directly in the SymbolTable, thus updating the - # imported symbols is not enough to update the type.dtype of expression nodes. - # Therefore we must also update the variable declaration with the updated symbol - for decl, symbol_map in decl_map.items(): - _symbols = SubstituteExpressions(symbol_map).visit(decl.symbols) - decl._update(symbols=as_tuple(_symbols)) - # Update any symbol table entries that have been inherited from the parent if self.parent: updated_symbol_attrs = {} From 9ae7b505f7408d725c589107ff9290a06fb2c30b Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Mon, 13 May 2024 17:55:12 +0200 Subject: [PATCH 36/37] block_index_trafos: change scope of test config fixture to function --- loki/program_unit.py | 3 +-- loki/transformations/tests/test_block_index_inject.py | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/loki/program_unit.py b/loki/program_unit.py index b5b312e3a..b4797834d 100644 --- a/loki/program_unit.py +++ b/loki/program_unit.py @@ -6,9 +6,8 @@ # nor does it submit to any jurisdiction. from abc import abstractmethod -from collections import defaultdict -from loki.expression import Variable, SubstituteExpressions +from loki.expression import Variable from loki.frontend import ( Frontend, parse_omni_source, parse_ofp_source, parse_fparser_source, RegexParserClass, preprocess_cpp, sanitize_input diff --git a/loki/transformations/tests/test_block_index_inject.py b/loki/transformations/tests/test_block_index_inject.py index a7fb5c880..26ded84ad 100644 --- a/loki/transformations/tests/test_block_index_inject.py +++ b/loki/transformations/tests/test_block_index_inject.py @@ -25,7 +25,7 @@ def fixture_blocking(): return Dimension(name='blocking', size='nb', index='ibl', index_aliases='bnds%kbl') -@pytest.fixture(scope='module', name='config') +@pytest.fixture(scope='function', name='config') def fixture_config(): """ Default configuration dict with basic options. From 67070941c80b66565d7d2ea9c6c631bd657f07bf Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Mon, 27 May 2024 09:22:34 +0200 Subject: [PATCH 37/37] Appease extra fussy upgraded linter --- loki/backend/maxgen.py | 2 +- loki/frontend/fparser.py | 1 + loki/program_unit.py | 2 +- loki/transformations/pool_allocator.py | 2 +- loki/transformations/single_column/scc_cuf.py | 2 +- 5 files changed, 5 insertions(+), 4 deletions(-) diff --git a/loki/backend/maxgen.py b/loki/backend/maxgen.py index 13984d191..2b0f6b9c7 100644 --- a/loki/backend/maxgen.py +++ b/loki/backend/maxgen.py @@ -176,7 +176,7 @@ class extends Kernel { # Class signature if is_manager: - if is_interface: + if is_interface: # pylint: disable=possibly-used-before-assignment header += [self.format_line( 'public interface ', o.name, ' extends ManagerPCIe, ManagerKernel {')] else: diff --git a/loki/frontend/fparser.py b/loki/frontend/fparser.py index ff939ded5..65729ed00 100644 --- a/loki/frontend/fparser.py +++ b/loki/frontend/fparser.py @@ -685,6 +685,7 @@ def visit_Char_Selector(self, o, **kwargs): * some scalar expression for the kind """ length = None + kind = None if o.children[0] is not None: length = self.visit(o.children[0], **kwargs) if o.children[1] is not None: diff --git a/loki/program_unit.py b/loki/program_unit.py index b4797834d..eb2c2650f 100644 --- a/loki/program_unit.py +++ b/loki/program_unit.py @@ -164,7 +164,7 @@ def from_source(cls, source, definitions=None, preprocess=False, if frontend == Frontend.OFP: ast = parse_ofp_source(source) return cls.from_ofp(ast=ast, raw_source=source, definitions=definitions, - pp_info=pp_info, parent=parent) + pp_info=pp_info, parent=parent) # pylint: disable=possibly-used-before-assignment if frontend == Frontend.FP: ast = parse_fparser_source(source) diff --git a/loki/transformations/pool_allocator.py b/loki/transformations/pool_allocator.py index 83116599b..6916c83c6 100644 --- a/loki/transformations/pool_allocator.py +++ b/loki/transformations/pool_allocator.py @@ -570,7 +570,7 @@ def _get_c_sizeof_arg(self, arr): elif arr.type.dtype == BasicType.COMPLEX: param = Cast(name='CMPLX', expression=(IntLiteral(1), IntLiteral(1))) - param.kind = getattr(arr.type, 'kind', None) + param.kind = getattr(arr.type, 'kind', None) # pylint: disable=possibly-used-before-assignment return param diff --git a/loki/transformations/single_column/scc_cuf.py b/loki/transformations/single_column/scc_cuf.py index 14fedd35b..8a54132a9 100644 --- a/loki/transformations/single_column/scc_cuf.py +++ b/loki/transformations/single_column/scc_cuf.py @@ -724,7 +724,7 @@ def transform_subroutine(self, routine, **kwargs): remove_pragmas(routine) single_variable_declaration(routine=routine, group_by_shape=True) - device_subroutine_prefix(routine, depth) + device_subroutine_prefix(routine, depth) # pylint: disable=possibly-used-before-assignment routine.spec.prepend(ir.Import(module="cudafor"))