Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SCC: Support for bounds aliases and derived type members as bounds #250

Merged
merged 12 commits into from
Apr 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 25 additions & 1 deletion loki/dimension.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,27 @@ class Dimension:
String representations of alternative size variables that are
used to define arrays shapes of this dimension (eg. alternative
names used in "driver" subroutines).
bounds_aliases : list or tuple of strings
String representations of alternative bounds variables that are
used to define loop ranges.
Comment on lines +34 to +36
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is really great, I was missing that functionality (and hacked around in the Fortran instead... 😬 )

"""

def __init__(self, name=None, index=None, bounds=None, size=None, aliases=None):
def __init__(self, name=None, index=None, bounds=None, size=None, aliases=None,
bounds_aliases=None):
self.name = name
self._index = index
self._bounds = as_tuple(bounds)
self._size = size
self._aliases = as_tuple(aliases)

if bounds_aliases:
if len(bounds_aliases) != 2:
raise RuntimeError(f'Start and end both needed for horizontal bounds aliases in {self.name}')
if bounds_aliases[0].split('%')[0] != bounds_aliases[1].split('%')[0]:
raise RuntimeError(f'Inconsistent root name for horizontal bounds aliases in {self.name}')

self._bounds_aliases = as_tuple(bounds_aliases)

def __repr__(self):
""" Pretty-print dimension details """
name = f'<{self.name}>' if self.name else ''
Expand Down Expand Up @@ -94,3 +106,15 @@ def size_expressions(self):
if self._bounds:
exprs += (f'{self._bounds[1]} - {self._bounds[0]} + 1', )
return exprs

@property
def bounds_expressions(self):
"""
A list of all expression strings representing the bounds of a data space.
"""

exprs = [(b,) for b in self.bounds]
if self._bounds_aliases:
exprs = [expr + (b,) for expr, b in zip(exprs, self._bounds_aliases)]

return as_tuple(exprs)
2 changes: 2 additions & 0 deletions loki/expression/tests/test_expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -1647,6 +1647,8 @@ def test_typebound_resolution_type_info(frontend):
for var_name in var_tt_to_try:
assert f'var_tt%{var_name}' not in sub.symbol_attrs

assert 'var_c%c_b%b_a%a' == sub.resolve_typebound_var('var_c%c_b%b_a%a')

# Create each derived type member and verify its type
for var_name, dtype in var_c_to_try.items():
var = var_c.get_derived_type_member(var_name)
Expand Down
20 changes: 20 additions & 0 deletions loki/program_unit.py
Original file line number Diff line number Diff line change
Expand Up @@ -798,3 +798,23 @@ def apply(self, op, **kwargs):
"""
# TODO: Should type-check for an `Operation` object here
op.apply(self, **kwargs)

def resolve_typebound_var(self, name, variable_map=None):
"""
A small convenience utility to resolve type-bound variables.

Parameters
----------
name : str
The full name of the variable to be resolved, e.g., a%b%c%d.
variable_map : dict
A map of the variables defined in the current scope.
"""

if not (_variable_map := variable_map):
_variable_map = self.variable_map

name_parts = name.split('%', maxsplit=1)
if (var := _variable_map.get(name_parts[0], None)) and len(name_parts) > 1:
var = var.get_derived_type_member(name_parts[1])
return var
7 changes: 7 additions & 0 deletions loki/tests/test_dimension.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,10 @@ def test_dimension_index_range(frontend):
assert FindNodes(Loop).visit(routine.body)[0].bounds == dim.range
assert FindNodes(Loop).visit(routine.body)[0].bounds.lower == dim.bounds[0]
assert FindNodes(Loop).visit(routine.body)[0].bounds.upper == dim.bounds[1]

# Test the correct creation of horizontal dim with aliased bounds vars
_ = Dimension('test_dim_alias', bounds_aliases=('bnds%start', 'bnds%end'))
with pytest.raises(RuntimeError):
_ = Dimension('test_dim_alias', bounds_aliases=('bnds%start',))
with pytest.raises(RuntimeError):
_ = Dimension('test_dim_alias', bounds_aliases=('bnds%start', 'some_other_bnds%end'))
182 changes: 170 additions & 12 deletions transformations/tests/test_single_column_coalesced.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,14 @@
def fixture_horizontal():
return Dimension(name='horizontal', size='nlon', index='jl', bounds=('start', 'end'), aliases=('nproma',))

@pytest.fixture(scope='module', name='horizontal_bounds_aliases')
def fixture_horizontal_bounds_aliases():
return Dimension(name='horizontal_bounds_aliases', size='nlon', index='jl', bounds=('start', 'end'),
aliases=('nproma',), bounds_aliases=('bnds%start', 'bnds%end'))

@pytest.fixture(scope='module', name='vertical')
def fixture_vertical():
return Dimension(name='vertical', size='nz', index='jk')
return Dimension(name='vertical', size='nz', index='jk', aliases=('nlev',))


@pytest.fixture(scope='module', name='blocking')
Expand Down Expand Up @@ -125,6 +129,107 @@ def test_scc_revector_transformation(frontend, horizontal):
assert kernel_calls[0].name == 'compute_column'


@pytest.mark.parametrize('frontend', available_frontends())
def test_scc_revector_transformation_aliased_bounds(frontend, horizontal_bounds_aliases):
"""
Test removal of vector loops in kernel and re-insertion of a single
hoisted horizontal loop in the kernel with aliased loop bounds.
"""

fcode_bnds_type_mod = """
module bnds_type_mod
implicit none
type bnds_type
integer :: start
integer :: end
end type bnds_type
end module bnds_type_mod
"""

fcode_driver = """
SUBROUTINE column_driver(nlon, nz, q, t, nb)
USE bnds_type_mod, only : bnds_type
INTEGER, INTENT(IN) :: nlon, nz, nb ! Size of the horizontal and vertical
REAL, INTENT(INOUT) :: t(nlon,nz,nb)
REAL, INTENT(INOUT) :: q(nlon,nz,nb)
INTEGER :: b, start, end
TYPE(bnds_type) :: bnds

bnds%start = 1
bnds%end = nlon
do b=1, nb
call compute_column(bnds, nlon, nz, q(:,:,b), t(:,:,b))
end do
END SUBROUTINE column_driver
"""

fcode_kernel = """
SUBROUTINE compute_column(bnds, nlon, nz, q, t)
USE bnds_type_mod, only : bnds_type
TYPE(bnds_type), INTENT(IN) :: bnds
INTEGER, INTENT(IN) :: nlon, nz ! Size of the horizontal and vertical
REAL, INTENT(INOUT) :: t(nlon,nz)
REAL, INTENT(INOUT) :: q(nlon,nz)
INTEGER :: jl, jk
REAL :: c

c = 5.345
DO jk = 2, nz
DO jl = bnds%start, bnds%end
t(jl, jk) = c * jk
q(jl, jk) = q(jl, jk-1) + t(jl, jk) * c
END DO
END DO

! The scaling is purposefully upper-cased
DO JL = BNDS%START, BNDS%END
Q(JL, NZ) = Q(JL, NZ) * C
END DO
END SUBROUTINE compute_column
"""
bnds_type_mod = Sourcefile.from_source(fcode_bnds_type_mod, frontend=frontend)
kernel = Sourcefile.from_source(fcode_kernel, frontend=frontend,
definitions=bnds_type_mod.definitions).subroutines[0]
driver = Sourcefile.from_source(fcode_driver, frontend=frontend,
definitions=bnds_type_mod.definitions).subroutines[0]

# Ensure we have three loops in the kernel prior to transformation
kernel_loops = FindNodes(Loop).visit(kernel.body)
assert len(kernel_loops) == 3

scc_transform = (SCCDevectorTransformation(horizontal=horizontal_bounds_aliases),)
scc_transform += (SCCRevectorTransformation(horizontal=horizontal_bounds_aliases),)
for transform in scc_transform:
transform.apply(driver, role='driver')
transform.apply(kernel, role='kernel')

# Ensure we have two nested loops in the kernel
# (the hoisted horizontal and the native vertical)
kernel_loops = FindNodes(Loop).visit(kernel.body)
assert len(kernel_loops) == 2
assert kernel_loops[1] in FindNodes(Loop).visit(kernel_loops[0].body)
assert kernel_loops[0].variable == 'jl'
assert kernel_loops[0].bounds == 'bnds%start:bnds%end'
assert kernel_loops[1].variable == 'jk'
assert kernel_loops[1].bounds == '2:nz'

# Ensure all expressions and array indices are unchanged
assigns = FindNodes(Assignment).visit(kernel.body)
assert fgen(assigns[1]).lower() == 't(jl, jk) = c*jk'
assert fgen(assigns[2]).lower() == 'q(jl, jk) = q(jl, jk - 1) + t(jl, jk)*c'
assert fgen(assigns[3]).lower() == 'q(jl, nz) = q(jl, nz)*c'

# Ensure driver remains unaffected
driver_loops = FindNodes(Loop).visit(driver.body)
assert len(driver_loops) == 1
assert driver_loops[0].variable == 'b'
assert driver_loops[0].bounds == '1:nb'

kernel_calls = FindNodes(CallStatement).visit(driver_loops[0])
assert len(kernel_calls) == 1
assert kernel_calls[0].name == 'compute_column'


@pytest.mark.parametrize('frontend', available_frontends())
def test_scc_base_resolve_vector_notation(frontend, horizontal):
"""
Expand Down Expand Up @@ -236,14 +341,16 @@ def test_scc_demote_transformation(frontend, horizontal):
"""

fcode_kernel = """
SUBROUTINE compute_column(start, end, nlon, nz, q)
SUBROUTINE compute_column(start, end, nlon, nproma, nz, q)
INTEGER, INTENT(IN) :: start, end ! Iteration indices
INTEGER, INTENT(IN) :: nlon, nz ! Size of the horizontal and vertical
INTEGER, INTENT(IN) :: nproma ! Horizontal size alias
REAL, INTENT(INOUT) :: q(nlon,nz)
REAL :: t(nlon,nz)
REAL :: a(nlon)
REAL :: a(nproma)
REAL :: b(nlon,psize)
REAL :: unused(nlon)
REAL :: d(nlon,psize)
INTEGER, PARAMETER :: psize = 3
INTEGER :: jl, jk
REAL :: c
Expand All @@ -263,18 +370,24 @@ def test_scc_demote_transformation(frontend, horizontal):
b(jl, 2) = Q(JL, 3)
b(jl, 3) = a(jl) * (b(jl, 1) + b(jl, 2))

d(jl, 1) = b(jl, 1)
d(jl, 2) = b(jl, 2)
d(jl, 3) = b(jl, 3)

Q(JL, NZ) = Q(JL, NZ) * C + b(jl, 3)
END DO
END SUBROUTINE compute_column
"""
kernel = Subroutine.from_source(fcode_kernel, frontend=frontend)
kernel_source = Sourcefile.from_source(fcode_kernel, frontend=frontend)
kernel_item = ProcedureItem(name='#compute_column', source=kernel_source, config={'preserve_arrays': ['d',]})
kernel = kernel_source.subroutines[0]

# Must run SCCDevector first because demotion relies on knowledge
# of vector sections
scc_transform = (SCCDevectorTransformation(horizontal=horizontal),)
scc_transform += (SCCDemoteTransformation(horizontal=horizontal),)
for transform in scc_transform:
transform.apply(kernel, role='kernel')
transform.apply(kernel, role='kernel', item=kernel_item)

# Ensure correct array variables shapes
assert isinstance(kernel.variable_map['a'], Scalar)
Expand All @@ -283,6 +396,7 @@ def test_scc_demote_transformation(frontend, horizontal):
assert isinstance(kernel.variable_map['t'], Array)
assert isinstance(kernel.variable_map['q'], Array)
assert isinstance(kernel.variable_map['unused'], Scalar)
assert isinstance(kernel.variable_map['d'], Array)

# Ensure that parameter-sized array b got demoted only
assert kernel.variable_map['b'].shape == ((3,) if frontend is OMNI else ('psize',))
Expand All @@ -297,7 +411,10 @@ def test_scc_demote_transformation(frontend, horizontal):
assert fgen(assigns[4]).lower() == 'b(1) = q(jl, 2)'
assert fgen(assigns[5]).lower() == 'b(2) = q(jl, 3)'
assert fgen(assigns[6]).lower() == 'b(3) = a*(b(1) + b(2))'
assert fgen(assigns[7]).lower() == 'q(jl, nz) = q(jl, nz)*c + b(3)'
assert fgen(assigns[7]).lower() == 'd(jl, 1) = b(1)'
assert fgen(assigns[8]).lower() == 'd(jl, 2) = b(2)'
assert fgen(assigns[9]).lower() == 'd(jl, 3) = b(3)'
assert fgen(assigns[10]).lower() == 'q(jl, nz) = q(jl, nz)*c + b(3)'


@pytest.mark.parametrize('frontend', available_frontends())
Expand Down Expand Up @@ -614,26 +731,30 @@ def test_scc_annotate_openacc(frontend, horizontal, blocking):
"""

fcode_driver = """
SUBROUTINE column_driver(nlon, nz, q, nb)
SUBROUTINE column_driver(nlon, nproma, nlev, nz, q, nb)
INTEGER, INTENT(IN) :: nlon, nz, nb ! Size of the horizontal and vertical
INTEGER, INTENT(IN) :: nproma, nlev ! Aliases of horizontal and vertical sizes
REAL, INTENT(INOUT) :: q(nlon,nz,nb)
INTEGER :: b, start, end

start = 1
end = nlon
do b=1, nb
call compute_column(start, end, nlon, nz, q(:,:,b))
call compute_column(start, end, nlon, nproma, nz, q(:,:,b))
end do
END SUBROUTINE column_driver
"""

fcode_kernel = """
SUBROUTINE compute_column(start, end, nlon, nz, q)
INTEGER, INTENT(IN) :: start, end ! Iteration indices
INTEGER, INTENT(IN) :: nlon, nz ! Size of the horizontal and vertical
SUBROUTINE compute_column(start, end, nlon, nproma, nlev, nz, q)
INTEGER, INTENT(IN) :: start, end ! Iteration indices
INTEGER, INTENT(IN) :: nlon, nz ! Size of the horizontal and vertical
INTEGER, INTENT(IN) :: nproma, nlev ! Aliases of horizontal and vertical sizes
REAL, INTENT(INOUT) :: q(nlon,nz)
REAL :: t(nlon,nz)
REAL :: a(nlon)
REAL :: d(nproma)
REAL :: e(nlev)
REAL :: b(nlon,psize)
INTEGER, PARAMETER :: psize = 3
INTEGER :: jl, jk
Expand Down Expand Up @@ -1696,11 +1817,19 @@ def test_single_column_coalesced_demotion_parameter(frontend, horizontal):


@pytest.mark.parametrize('frontend', available_frontends())
def test_scc_base_horizontal_bounds_checks(frontend, horizontal):
def test_scc_base_horizontal_bounds_checks(frontend, horizontal, horizontal_bounds_aliases):
"""
Test the SCCBaseTransformation checks for horizontal loop bounds.
"""

fcode = """
subroutine kernel(start, end, work)
real, intent(inout) :: work
integer, intent(in) :: start, end

end subroutine kernel
"""

fcode_no_start = """
subroutine kernel(end, work)
real, intent(inout) :: work
Expand All @@ -1717,15 +1846,44 @@ def test_scc_base_horizontal_bounds_checks(frontend, horizontal):
end subroutine kernel
"""

fcode_alias = """
module bnds_type_mod
implicit none
type bnds_type
integer :: start
integer :: end
end type bnds_type
end module bnds_type_mod

subroutine kernel(bnds, work)
use bnds_type_mod, only : bnds_type
type(bnds_type), intent(in) :: bnds
real, intent(inout) :: work

end subroutine kernel
"""

routine = Subroutine.from_source(fcode, frontend=frontend)
no_start = Subroutine.from_source(fcode_no_start, frontend=frontend)
no_end = Subroutine.from_source(fcode_no_end, frontend=frontend)
alias = Sourcefile.from_source(fcode_alias, frontend=frontend).subroutines[0]

transform = SCCBaseTransformation(horizontal=horizontal)
with pytest.raises(RuntimeError):
transform.apply(no_start, role='kernel')
with pytest.raises(RuntimeError):
transform.apply(no_end, role='kernel')

transform = SCCBaseTransformation(horizontal=horizontal_bounds_aliases)
transform.apply(alias, role='kernel')

bounds = SCCBaseTransformation.get_horizontal_loop_bounds(routine, horizontal_bounds_aliases)
assert bounds[0] == 'start'
assert bounds[1] == 'end'

bounds = SCCBaseTransformation.get_horizontal_loop_bounds(alias, horizontal_bounds_aliases)
assert bounds[0] == 'bnds%start'
assert bounds[1] == 'bnds%end'

@pytest.mark.parametrize('frontend', available_frontends())
@pytest.mark.parametrize('trim_vector_sections', [False, True])
Expand Down
6 changes: 1 addition & 5 deletions transformations/transformations/pool_allocator.py
Original file line number Diff line number Diff line change
Expand Up @@ -435,11 +435,7 @@ def _get_stack_storage_and_size_var(self, routine, stack_size):
)
variables_append += [stack_storage]

name_parts = self.block_dim.size.split('%', maxsplit=1)
block_size = routine.symbol_map[name_parts[0]]
if len(name_parts) > 1:
block_size = block_size.get_derived_type_member(name_parts[1])

block_size = routine.resolve_typebound_var(self.block_dim.size, routine.symbol_map)
stack_alloc = Allocation(variables=(stack_storage.clone(dimensions=( # pylint: disable=no-member
stack_size_var, block_size)),))
stack_dealloc = Deallocation(variables=(stack_storage.clone(dimensions=None),)) # pylint: disable=no-member
Expand Down
Loading
Loading