diff --git a/AUTHORS.md b/AUTHORS.md index 96a2c2cdf..1b6250510 100644 --- a/AUTHORS.md +++ b/AUTHORS.md @@ -2,6 +2,7 @@ - R. Heilemann Myhre (Met Norway) - M. Lange (ECMWF) +- J. Legaux (CERFACS) - O. Marsden (ECMWF) - A. Nawab (ECMWF) - B. Reuter (ECMWF) diff --git a/cmake/loki_transform.cmake b/cmake/loki_transform.cmake index b50e243e4..831ce34ba 100644 --- a/cmake/loki_transform.cmake +++ b/cmake/loki_transform.cmake @@ -182,7 +182,7 @@ endmacro() # [OMNI_INCLUDE [ ...]] # [XMOD [ ...]] # [REMOVE_OPENMP] [DATA_OFFLOAD] [GLOBAL_VAR_OFFLOAD] -# [TRIM_VECTOR_SECTIONS] [REMOVE_DERIVED_ARGS] +# [TRIM_VECTOR_SECTIONS] [REMOVE_DERIVED_ARGS] [INLINE_MEMBERS] # ) # # Call ``loki-transform.py convert ...`` with the provided arguments. @@ -199,7 +199,10 @@ endmacro() function( loki_transform_convert ) - set( options CPP DATA_OFFLOAD REMOVE_OPENMP ASSUME_DEVICEPTR GLOBAL_VAR_OFFLOAD TRIM_VECTOR_SECTIONS REMOVE_DERIVED_ARGS ) + set( + options CPP DATA_OFFLOAD REMOVE_OPENMP ASSUME_DEVICEPTR GLOBAL_VAR_OFFLOAD + TRIM_VECTOR_SECTIONS REMOVE_DERIVED_ARGS INLINE_MEMBERS + ) set( oneValueArgs MODE DIRECTIVE FRONTEND CONFIG PATH OUTPATH ) set( multiValueArgs OUTPUT DEPENDS INCLUDES INCLUDE HEADERS HEADER DEFINITIONS DEFINE OMNI_INCLUDE XMOD ) @@ -252,6 +255,10 @@ function( loki_transform_convert ) list( APPEND _ARGS --remove-derived-args ) endif() + if( ${_PAR_INLINE_MEMBERS} ) + list( APPEND _ARGS --inline-members ) + endif() + _loki_transform_env_setup() add_custom_command( @@ -588,6 +595,7 @@ endfunction() # [DIRECTIVE ] # [CPP] # [FRONTEND ] +# [INLINE_MEMBERS] # [BUILDDIR ] # [SOURCES [ ...]] # [HEADERS [ ...]] @@ -607,7 +615,7 @@ endfunction() function( loki_transform_command ) - set( options CPP ) + set( options CPP INLINE_MEMBERS ) set( oneValueArgs COMMAND MODE DIRECTIVE FRONTEND CONFIG BUILDDIR ) set( multiValueArgs OUTPUT DEPENDS SOURCES HEADERS ) @@ -731,7 +739,7 @@ endfunction() function( loki_transform_target ) - set( options NO_PLAN_SOURCEDIR COPY_UNMODIFIED CPP CPP_PLAN ) + set( options NO_PLAN_SOURCEDIR COPY_UNMODIFIED CPP CPP_PLAN INLINE_MEMBERS ) set( single_value_args TARGET COMMAND MODE DIRECTIVE FRONTEND CONFIG PLAN ) set( multi_value_args SOURCES HEADERS ) @@ -794,6 +802,10 @@ function( loki_transform_target ) list( APPEND _TRANSFORM_OPTIONS CPP ) endif() + if( _PAR_INLINE_MEMBERS ) + list( APPEND _TRANSFORM_OPTIONS INLINE_MEMBERS ) + endif() + loki_transform_command( COMMAND ${_PAR_COMMAND} OUTPUT ${LOKI_SOURCES_TO_APPEND} diff --git a/loki/expression/symbols.py b/loki/expression/symbols.py index df1d9fd9c..568363b2f 100644 --- a/loki/expression/symbols.py +++ b/loki/expression/symbols.py @@ -1069,7 +1069,7 @@ class LogicLiteral(StrCompareMixin, _Literal): """ def __init__(self, value, **kwargs): - self.value = value.lower() in ('true', '.true.') + self.value = str(value).lower() in ('true', '.true.') super().__init__(**kwargs) init_arg_names = ('value', ) diff --git a/loki/ir.py b/loki/ir.py index 134bdf5be..323a4e3c5 100644 --- a/loki/ir.py +++ b/loki/ir.py @@ -921,6 +921,14 @@ def arg_iter(self): kwargs = ((r_args[kw], arg) for kw, arg in as_tuple(self.kwarguments)) return chain(args, kwargs) + @property + def arg_map(self): + """ + A full map of all qualified argument matches from arguments + and keyword arguments. + """ + return dict(self.arg_iter()) + @dataclass_strict(frozen=True) class _AllocationBase(): diff --git a/loki/transform/transform_inline.py b/loki/transform/transform_inline.py index 001016fb6..dd7085304 100644 --- a/loki/transform/transform_inline.py +++ b/loki/transform/transform_inline.py @@ -14,13 +14,18 @@ FindVariables, FindInlineCalls, FindLiterals, SubstituteExpressions, LokiIdentityMapper ) -from loki.ir import Import, Comment, Assignment +from loki.ir import Import, Comment, Assignment, VariableDeclaration, CallStatement from loki.expression import symbols as sym from loki.types import BasicType from loki.visitors import Transformer, FindNodes +from loki.tools import as_tuple +from loki.logging import warning, error -__all__ = ['inline_constant_parameters', 'inline_elemental_functions'] +__all__ = [ + 'inline_constant_parameters', 'inline_elemental_functions', + 'inline_member_procedures' +] class InlineSubstitutionMapper(LokiIdentityMapper): @@ -183,3 +188,132 @@ def inline_elemental_functions(routine): if all(hasattr(s, 'type') and s.type.dtype in removed_functions for s in im.symbols): import_map[im] = None routine.spec = Transformer(import_map).visit(routine.spec) + + +def inline_member_routine(routine, member): + """ + Inline an individual member :any:`Subroutine` at source level. + + This will replace all :any:`Call` objects to the specified + subroutine with an adjusted equivalent of the member routines' + body. For this, argument matching, including partial dimension + matching for array references is performed, and all + member-specific declarations are hoisted to the containing + :any:`Subroutine`. + + Parameters + ---------- + routine : :any:`Subroutine` + The subroutine in which to inline all calls to the member routine + member : :any:`Subroutine` + The contained member subroutine to be inlined in the parent + """ + # pylint: disable=import-outside-toplevel,cyclic-import + from loki.transform import recursive_expression_map_update + + def _map_unbound_dims(var, val): + """ + Maps all unbound dimension ranges in the passed array value + ``val`` with the indices from the local variable ``var``. It + returns the re-mapped symbol. + + For example, mapping the passed array ``m(:,j)`` to the local + expression ``a(i)`` yields ``m(i,j)``. + """ + new_dimensions = list(val.dimensions) + + indices = [index for index, dim in enumerate(val.dimensions) if isinstance(dim, sym.Range)] + + for index, dim in enumerate(var.dimensions): + new_dimensions[indices[index]] = dim + + return val.clone(dimensions=tuple(new_dimensions)) + + # Prevent shadowing of member variables by renaming them a priori + parent_variables = routine.variable_map + duplicate_locals = tuple( + v for v in member.variables + if v.name in parent_variables and v.name.lower() not in member._dummies + ) + shadow_mapper = SubstituteExpressions( + {v: v.clone(name=f'{member.name}_{v.name}') for v in duplicate_locals} + ) + member.spec = shadow_mapper.visit(member.spec) + member.body = shadow_mapper.visit(member.body) + + # Get local variable declarations and hoist them + decls = FindNodes(VariableDeclaration).visit(member.spec) + decls = tuple(d for d in decls if all(s.name.lower() not in routine._dummies for s in d.symbols)) + decls = tuple(d for d in decls if all(s not in routine.variables for s in d.symbols)) + routine.spec.append(decls) + + call_map = {} + for call in FindNodes(CallStatement).visit(routine.body): + if call.routine == member: + argmap = {} + member_vars = FindVariables().visit(member.body) + + # Match dimension indexes between the argument and the given value + # for all occurences of the argument in the body + for arg, val in call.arg_map.items(): + if isinstance(arg, sym.Array): + # Resolve implicit dimension ranges of the passed value, + # eg. when passing a two-dimensional array `a` as `call(arg=a)` + # Check if val is a DeferredTypeSymbol, as it does not have a `dimensions` attribute + if not isinstance(val, sym.DeferredTypeSymbol) and val.dimensions: + qualified_value = val + else: + qualified_value = val.clone( + dimensions=tuple(sym.Range((None, None)) for _ in arg.shape) + ) + + # If sequence association (scalar-to-array argument passing) is used, + # we cannot determine the right re-mapped iteration space, so we bail here! + if not any(isinstance(d, sym.Range) for d in qualified_value.dimensions): + error( + '[Loki::TransformInline] Cannot find free dimension resolving ' + f' array argument for value "{qualified_value}"' + ) + raise RuntimeError('[Loki::TransformInline] Unable to resolve member subroutine call') + arg_vars = tuple(v for v in member_vars if v.name == arg.name) + argmap.update((v, _map_unbound_dims(v, qualified_value)) for v in arg_vars) + else: + argmap[arg] = val + + # Recursive update of the map in case of nested variables to map + argmap = recursive_expression_map_update(argmap, max_iterations=10) + + # Substitute argument calls into a copy of the body + member_body = SubstituteExpressions(argmap).visit(member.body.body) + + # Inline substituted body within a pair of marker comments + comment = Comment(f'! [Loki] inlined member subroutine: {member.name}') + c_line = Comment('! =========================================') + call_map[call] = (comment, c_line) + as_tuple(member_body) + (c_line, ) + + # Replace calls to member with the member's body + routine.body = Transformer(call_map).visit(routine.body) + # Can't use transformer to replace subroutine, so strip it manually + contains_body = tuple(n for n in routine.contains.body if not n == member) + routine.contains._update(body=contains_body) + + +def inline_member_procedures(routine): + """ + Inline all member subroutines contained in an individual :any:`Subroutine`. + + Please note that member functions are not yet supported! + + Parameters + ---------- + routine : :any:`Subroutine` + The subroutine in which to inline all member routines + """ + + # Run through all members and invoke individual inlining transforms + for member in routine.members: + if member.is_function: + # TODO: Implement for functions!!! + warning('[Loki::inline] Inlining member functions is not yet supported, only subroutines!') + else: + inline_member_routine(routine, member) diff --git a/scripts/loki_transform.py b/scripts/loki_transform.py index 1286edd6a..f0c534905 100644 --- a/scripts/loki_transform.py +++ b/scripts/loki_transform.py @@ -102,10 +102,12 @@ def cli(debug): help="Generate offload instructions for global vars imported via 'USE' statements.") @click.option('--remove-derived-args/--no-remove-derived-args', default=False, help="Remove derived-type arguments and replace with canonical arguments") +@click.option('--inline-members/--no-inline-members', default=False, + help='Inline member functions for SCC-class transformations.') def convert( mode, config, build, source, header, cpp, directive, include, define, omni_include, xmod, data_offload, remove_openmp, assume_deviceptr, frontend, trim_vector_sections, - global_var_offload, remove_derived_args + global_var_offload, remove_derived_args, inline_members ): """ Batch-processing mode for Fortran-to-Fortran transformations that @@ -190,7 +192,9 @@ def convert( horizontal = scheduler.config.dimensions['horizontal'] vertical = scheduler.config.dimensions['vertical'] block_dim = scheduler.config.dimensions['block_dim'] - transformation = (SCCBaseTransformation(horizontal=horizontal, directive=directive),) + transformation = (SCCBaseTransformation( + horizontal=horizontal, directive=directive, inline_members=inline_members + ),) transformation += (SCCDevectorTransformation(horizontal=horizontal, trim_vector_sections=trim_vector_sections),) transformation += (SCCDemoteTransformation(horizontal=horizontal),) if not 'hoist' in mode: diff --git a/tests/test_expression.py b/tests/test_expression.py index 76c29fb64..622241a6d 100644 --- a/tests/test_expression.py +++ b/tests/test_expression.py @@ -988,6 +988,9 @@ def test_string_compare(): assert symbols.Literal('u') == 'u' assert symbols.Literal('u') != 'U' assert symbols.Literal('u') != u # The `Variable(name='u', ...) from above + assert symbols.Literal('.TrUe.') == 'true' + # Specific test for constructor checks + assert symbols.LogicLiteral(value=True) == 'true' @pytest.mark.skipif(not HAVE_FP, reason='Fparser not available') diff --git a/tests/test_transform_inline.py b/tests/test_transform_inline.py index e6061c0e4..16b4f9e96 100644 --- a/tests/test_transform_inline.py +++ b/tests/test_transform_inline.py @@ -7,12 +7,19 @@ from pathlib import Path import pytest +import numpy as np -from conftest import jit_compile_lib, available_frontends -from loki import Builder, Module, Subroutine, FindNodes, Import, FindVariables +from conftest import jit_compile, jit_compile_lib, available_frontends +from loki import ( + Builder, Module, Subroutine, FindNodes, Import, FindVariables, + CallStatement, Loop, BasicType, DerivedType, OMNI +) from loki.ir import Assignment -from loki.transform import inline_elemental_functions, inline_constant_parameters, replace_selected_kind - +from loki.transform import ( + inline_elemental_functions, inline_constant_parameters, + replace_selected_kind, inline_member_procedures +) +from loki.expression import symbols as sym @pytest.fixture(scope='module', name='here') def fixture_here(): @@ -281,3 +288,261 @@ def test_constant_replacement_internal(frontend): stmts = FindNodes(Assignment).visit(routine.body) assert len(stmts) == 1 assert stmts[0].rhs == 'b + 10' + + +@pytest.mark.parametrize('frontend', available_frontends()) +def test_inline_member_routines(here, frontend): + """ + Test inlining of member subroutines. + """ + fcode = """ +subroutine member_routines(a, b) + real(kind=8), intent(inout) :: a(3), b(3) + integer :: i + + do i=1, size(a) + call add_one(a(i)) + end do + + call add_to_a(b) + + do i=1, size(a) + call add_one(a(i)) + end do + + contains + + subroutine add_one(a) + real(kind=8), intent(inout) :: a + a = a + 1 + end subroutine + + subroutine add_to_a(b) + real(kind=8), intent(inout) :: b(:) + integer :: n + + n = size(a) + do i = 1, n + a(i) = a(i) + b(i) + end do + end subroutine +end subroutine member_routines + """ + routine = Subroutine.from_source(fcode, frontend=frontend) + + filepath = here/(f'ref_transform_inline_member_routines_{frontend}.f90') + reference = jit_compile(routine, filepath=filepath, objname='member_routines') + + a = np.array([1., 2., 3.], order='F') + b = np.array([3., 3., 3.], order='F') + reference(a, b) + + assert (a == [6., 7., 8.]).all() + assert (b == [3., 3., 3.]).all() + + # Now inline the member routines and check again + inline_member_procedures(routine=routine) + + assert not routine.members + assert not FindNodes(CallStatement).visit(routine.body) + assert len(FindNodes(Loop).visit(routine.body)) == 3 + assert 'n' in routine.variables + + # An verify compiled behaviour + filepath = here/(f'transform_inline_member_routines_{frontend}.f90') + function = jit_compile(routine, filepath=filepath, objname='member_routines') + + a = np.array([1., 2., 3.], order='F') + b = np.array([3., 3., 3.], order='F') + function(a, b) + + assert (a == [6., 7., 8.]).all() + assert (b == [3., 3., 3.]).all() + + +@pytest.mark.parametrize('frontend', available_frontends()) +def test_inline_member_routines_arg_dimensions(frontend): + """ + Test inlining of member subroutines when sub-arrays of rank less + than the formal argument are passed. + """ + fcode = """ +subroutine member_routines_arg_dimensions(matrix, tensor) + real(kind=8), intent(inout) :: matrix(3, 3), tensor(3, 3, 4) + integer :: i + do i=1, 3 + call add_one(3, matrix(1:3,i), tensor(:,i,:)) + end do + contains + subroutine add_one(n, a, b) + integer, intent(in) :: n + real(kind=8), intent(inout) :: a(3), b(3,1:n) + integer :: j + do j=1, n + a(j) = a(j) + 1 + b(j,:) = 66.6 + end do + end subroutine +end subroutine member_routines_arg_dimensions + """ + routine = Subroutine.from_source(fcode, frontend=frontend) + + # Ensure initial member arguments + assert len(routine.routines) == 1 + assert routine.routines[0].name == 'add_one' + assert len(routine.routines[0].arguments) == 3 + assert routine.routines[0].arguments[0].name == 'n' + assert routine.routines[0].arguments[1].name == 'a' + assert routine.routines[0].arguments[2].name == 'b' + + # Now inline the member routines and check again + inline_member_procedures(routine=routine) + + # Ensure member has been inlined and arguments adapated + assert len(routine.routines) == 0 + assert len([v for v in FindVariables().visit(routine.body) if v.name == 'a']) == 0 + assigns = FindNodes(Assignment).visit(routine.body) + assert len(assigns) == 2 + assert assigns[0].lhs == 'matrix(j, i)' and assigns[0].rhs =='matrix(j, i) + 1' + assert assigns[1].lhs == 'tensor(j, i, :)' + + # Ensure the `n` in the inner loop bound has been substituted too + loops = FindNodes(Loop).visit(routine.body) + assert len(loops) == 2 + assert loops[0].bounds == '1:3' + assert loops[1].bounds == '1:3' + + +@pytest.mark.parametrize('frontend', available_frontends(xfail=[(OMNI, 'No header information in test')])) +def test_inline_member_routines_derived_type_member(frontend): + """ + Test inlining of member subroutines when the member routine + handles arrays that are derived type components and thus might + have the DEFERRED type. + """ + fcode = """ +subroutine outer(x, a) + real, intent(inout) :: x + type(my_type), intent(in) :: a + + ! Pass derived type arrays as arguments + call inner(a%b(:), a%c, a%k, a%n) + +contains + subroutine inner(y, z, k, n) + integer, intent(in) :: k, n + real, intent(inout) :: y(n), z(:,:) + integer :: j + + do j=1, n + x = x + y(j) + ! Use derived-type variable as index + ! to test for nested substitution + y(j) = z(k,j) + end do + end subroutine inner +end subroutine outer + """ + routine = Subroutine.from_source(fcode, frontend=frontend) + + assert routine.variable_map['x'].type.dtype == BasicType.REAL + assert isinstance(routine.variable_map['a'].type.dtype, DerivedType) + call = FindNodes(CallStatement).visit(routine.body)[0] + assert isinstance(call.arguments[0], sym.Array) + assert isinstance(call.arguments[1], sym.DeferredTypeSymbol) + assert isinstance(call.arguments[2], sym.DeferredTypeSymbol) + + # Now inline the member routines and check again + inline_member_procedures(routine=routine) + + assigns = FindNodes(Assignment).visit(routine.body) + assert len(assigns) == 2 + assert assigns[0].rhs =='x + a%b(j)' + assert assigns[1].lhs == 'a%b(j)' and assigns[1].rhs == 'a%c(a%k, j)' + + +@pytest.mark.parametrize('frontend', available_frontends()) +def test_inline_member_routines_variable_shadowing(frontend): + """ + Test inlining of member subroutines when variable allocations + in child routine shadow different allocations in the parent. + """ + fcode = """ +subroutine outer() + real :: x = 3 ! 'x' is real in outer. + real :: tmp = 0 + real :: y + + y = 1.0 + call inner(tmp, y=y) + x = x + tmp + +contains + subroutine inner(y) + real, intent(inout) :: Y + real :: x(3) ! 'x' is array in inner. + x = [1, 2, 3] + y = y + sum(x) + end subroutine inner +end subroutine outer + """ + routine = Subroutine.from_source(fcode, frontend=frontend) + + # Check outer and inner 'x' + assert routine.variable_map['x'] == 'x' + assert isinstance(routine.variable_map['x'], sym.Scalar) + assert routine.variable_map['x'].type.initial == 3 + + assert routine['inner'].variable_map['x'] in ['x(3)', 'x(1:3)'] + assert isinstance(routine['inner'].variable_map['x'], sym.Array) + assert routine['inner'].variable_map['x'].type.shape == (3,) + + inline_member_procedures(routine=routine) + + # Check outer has not changed + assert routine.variable_map['x'] == 'x' + assert isinstance(routine.variable_map['x'], sym.Scalar) + assert routine.variable_map['x'].type.initial == 3 + + # Check inner 'x' was moved correctly + assert routine.variable_map['inner_x'] in ['inner_x(3)', 'inner_x(1:3)'] + assert isinstance(routine.variable_map['inner_x'], sym.Array) + assert routine.variable_map['inner_x'].type.shape == (3,) + + # Check inner 'y' was substituted, not renamed! + assign = FindNodes(Assignment).visit(routine.body) + assert routine.variable_map['y'] == 'y' + assert assign[2].lhs == 'y' and assign[2].rhs == 'y + sum(x)' + + +@pytest.mark.parametrize('frontend', available_frontends()) +def test_inline_member_routines_sequence_assoc(frontend): + """ + Test inlining of member subroutines in the presence of sequence + associations. As this is not supported, we check for the + appropriate error. + """ + fcode = """ +subroutine member_routines_sequence_assoc(vector) + real(kind=8), intent(inout) :: vector(6) + integer :: i + + i = 2 + call inner(3, vector(i)) + + contains + subroutine inner(n, a) + integer, intent(in) :: n + real(kind=8), intent(inout) :: a(3) + integer :: j + do j=1, n + a(j) = a(j) + 1 + end do + end subroutine +end subroutine member_routines_sequence_assoc + """ + routine = Subroutine.from_source(fcode, frontend=frontend) + + # Expect to fail here due to use of sequence association + with pytest.raises(RuntimeError): + inline_member_procedures(routine=routine) diff --git a/transformations/transformations/single_column_coalesced.py b/transformations/transformations/single_column_coalesced.py index 0fd3d0949..ed805b70f 100644 --- a/transformations/transformations/single_column_coalesced.py +++ b/transformations/transformations/single_column_coalesced.py @@ -7,11 +7,13 @@ import re from loki.expression import symbols as sym +from loki.transform import resolve_associates, inline_member_procedures from loki import ( Transformation, FindNodes, FindScopes, Transformer, info, - pragmas_attached, as_tuple, flatten, ir, resolve_associates, - FindExpressions, SymbolAttributes, BasicType, SubstituteExpressions, DerivedType, - FindVariables, CaseInsensitiveDict, pragma_regions_attached, PragmaRegion, is_loki_pragma + pragmas_attached, as_tuple, flatten, ir, FindExpressions, + SymbolAttributes, BasicType, SubstituteExpressions, DerivedType, + FindVariables, CaseInsensitiveDict, pragma_regions_attached, + PragmaRegion, is_loki_pragma ) __all__ = ['SCCBaseTransformation', 'SCCAnnotateTransformation', 'SCCHoistTransformation'] @@ -31,14 +33,18 @@ class methods can be called directly. directive : string or None Directives flavour to use for parallelism annotations; either ``'openacc'`` or ``None``. + inline_members : bool + Enable full source-inlining of member subroutines; default: False. """ - def __init__(self, horizontal, directive=None): + def __init__(self, horizontal, directive=None, inline_members=False): self.horizontal = horizontal assert directive in [None, 'openacc'] self.directive = directive + self.inline_members = inline_members + @classmethod def check_routine_pragmas(cls, routine, directive): """ @@ -229,6 +235,10 @@ def process_kernel(self, routine): # Find the iteration index variable for the specified horizontal v_index = self.get_integer_variable(routine, name=self.horizontal.index) + # Perform full source-inlining for member subroutines if so requested + if self.inline_members: + inline_member_procedures(routine) + # Associates at the highest level, so they don't interfere # with the sections we need to do for detecting subroutine calls resolve_associates(routine)